You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by xu...@apache.org on 2014/11/21 16:17:17 UTC
svn commit: r1640934 - in /hive/branches/spark: itests/src/test/resources/
ql/src/java/org/apache/hadoop/hive/ql/parse/spark/
ql/src/test/results/clientpositive/spark/
Author: xuefu
Date: Fri Nov 21 15:17:16 2014
New Revision: 1640934
URL: http://svn.apache.org/r1640934
Log:
HIVE-8756: numRows and rawDataSize are not collected by the Spark stats [Spark Branch] (Na via Xuefu)
Added:
hive/branches/spark/ql/src/test/results/clientpositive/spark/stats1.q.out
Modified:
hive/branches/spark/itests/src/test/resources/testconfiguration.properties
hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkProcContext.java
hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java
hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/semijoin.q.out
Modified: hive/branches/spark/itests/src/test/resources/testconfiguration.properties
URL: http://svn.apache.org/viewvc/hive/branches/spark/itests/src/test/resources/testconfiguration.properties?rev=1640934&r1=1640933&r2=1640934&view=diff
==============================================================================
--- hive/branches/spark/itests/src/test/resources/testconfiguration.properties (original)
+++ hive/branches/spark/itests/src/test/resources/testconfiguration.properties Fri Nov 21 15:17:16 2014
@@ -834,6 +834,7 @@ spark.query.files=add_part_multiple.q, \
stats_only_null.q, \
stats_partscan_1_23.q, \
stats0.q, \
+ stats1.q, \
stats10.q, \
stats12.q, \
stats13.q, \
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkProcContext.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkProcContext.java?rev=1640934&r1=1640933&r2=1640934&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkProcContext.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkProcContext.java Fri Nov 21 15:17:16 2014
@@ -130,6 +130,7 @@ public class GenSparkProcContext impleme
public final Set<ReduceSinkOperator> clonedReduceSinks;
public final Set<FileSinkOperator> fileSinkSet;
+ public final Map<FileSinkOperator, List<FileSinkOperator>> fileSinkMap;
// remember which reducesinks we've already connected
public final Set<ReduceSinkOperator> connectedReduceSinks;
@@ -169,6 +170,7 @@ public class GenSparkProcContext impleme
this.workWithUnionOperators = new LinkedHashSet<BaseWork>();
this.clonedReduceSinks = new LinkedHashSet<ReduceSinkOperator>();
this.fileSinkSet = new LinkedHashSet<FileSinkOperator>();
+ this.fileSinkMap = new LinkedHashMap<FileSinkOperator, List<FileSinkOperator>>();
this.connectedReduceSinks = new LinkedHashSet<ReduceSinkOperator>();
}
}
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java?rev=1640934&r1=1640933&r2=1640934&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java Fri Nov 21 15:17:16 2014
@@ -25,6 +25,7 @@ import org.apache.commons.logging.LogFac
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
import org.apache.hadoop.hive.ql.exec.FetchTask;
import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
@@ -48,6 +49,7 @@ import org.apache.hadoop.hive.ql.plan.Re
import org.apache.hadoop.hive.ql.plan.SparkEdgeProperty;
import org.apache.hadoop.hive.ql.plan.SparkWork;
import org.apache.hadoop.hive.ql.plan.UnionWork;
+import org.apache.hadoop.hive.ql.stats.StatsFactory;
import java.util.ArrayList;
import java.util.Deque;
@@ -182,6 +184,15 @@ public class GenSparkUtils {
context.inputs, partitions, root, alias, context.conf, false);
}
+ private void collectOperators (Operator<?> op, List<Operator<?>> opList) {
+ opList.add(op);
+ for (Object child : op.getChildOperators()) {
+ if (child != null) {
+ collectOperators((Operator<?>)child, opList);
+ }
+ }
+ }
+
// removes any union operator and clones the plan
public void removeUnionOperators(Configuration conf, GenSparkProcContext context,
BaseWork work)
@@ -196,6 +207,29 @@ public class GenSparkUtils {
// need to clone the plan.
List<Operator<?>> newRoots = Utilities.cloneOperatorTree(conf, roots);
+ // Build a map to map the original FileSinkOperator and the cloned FileSinkOperators
+ // This map is used for set the stats flag for the cloned FileSinkOperators in later process
+ Iterator<Operator<?>> newRoots_it = newRoots.iterator();
+ for (Operator<?> root : roots) {
+ Operator<?> newRoot = newRoots_it.next();
+ List<Operator<?>> newOpQueue = new LinkedList<Operator<?>>();
+ collectOperators (newRoot, newOpQueue);
+ List<Operator<?>> opQueue = new LinkedList<Operator<?>>();
+ collectOperators (root, opQueue);
+ Iterator<Operator<?>> newOpQueue_it = newOpQueue.iterator();
+ for (Operator<?> op : opQueue) {
+ Operator<?> newOp = newOpQueue_it.next();
+ if (op instanceof FileSinkOperator) {
+ List<FileSinkOperator> fileSinkList = context.fileSinkMap.get((FileSinkOperator)op);
+ if (fileSinkList == null) {
+ fileSinkList = new LinkedList<FileSinkOperator>();
+ }
+ fileSinkList.add((FileSinkOperator)newOp);
+ context.fileSinkMap.put((FileSinkOperator)op, fileSinkList);
+ }
+ }
+ }
+
// we're cloning the operator plan but we're retaining the original work. That means
// that root operators have to be replaced with the cloned ops. The replacement map
// tells you what that mapping is.
@@ -272,8 +306,17 @@ public class GenSparkUtils {
GenMapRedUtils.isInsertInto(parseContext, fileSink);
HiveConf hconf = parseContext.getConf();
- boolean chDir = GenMapRedUtils.isMergeRequired(context.moveTask,
- hconf, fileSink, context.currentTask, isInsertTable);
+ boolean chDir = GenMapRedUtils.isMergeRequired(context.moveTask,
+ hconf, fileSink, context.currentTask, isInsertTable);
+ // Set stats config for FileSinkOperators which are cloned from the fileSink
+ List<FileSinkOperator> fileSinkList = context.fileSinkMap.get(fileSink);
+ if (fileSinkList != null) {
+ for (FileSinkOperator fsOp : fileSinkList) {
+ fsOp.getConf().setGatherStats(fileSink.getConf().isGatherStats());
+ fsOp.getConf().setStatsReliable(fileSink.getConf().isStatsReliable());
+ fsOp.getConf().setMaxStatsKeyPrefixLength(fileSink.getConf().getMaxStatsKeyPrefixLength());
+ }
+ }
Path finalName = GenMapRedUtils.createMoveTask(context.currentTask,
chDir, fileSink, parseContext, context.moveTask, hconf, context.dependencyTask);
Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out?rev=1640934&r1=1640933&r2=1640934&view=diff
==============================================================================
Files hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out (original) and hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out Fri Nov 21 15:17:16 2014 differ
Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out?rev=1640934&r1=1640933&r2=1640934&view=diff
==============================================================================
Files hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out (original) and hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out Fri Nov 21 15:17:16 2014 differ
Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/semijoin.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/semijoin.q.out?rev=1640934&r1=1640933&r2=1640934&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/semijoin.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/semijoin.q.out Fri Nov 21 15:17:16 2014
@@ -448,24 +448,24 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: b
- Statistics: Num rows: 46 Data size: 185 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: (key < 15) (type: boolean)
- Statistics: Num rows: 15 Data size: 60 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 7 Data size: 51 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: int)
outputColumnNames: key
- Statistics: Num rows: 15 Data size: 60 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 7 Data size: 51 Basic stats: COMPLETE Column stats: NONE
Group By Operator
keys: key (type: int), key (type: int)
mode: hash
outputColumnNames: _col0, _col1
- Statistics: Num rows: 15 Data size: 60 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 7 Data size: 51 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col1 (type: int)
sort order: +
Map-reduce partition columns: _col1 (type: int)
- Statistics: Num rows: 15 Data size: 60 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 7 Data size: 51 Basic stats: COMPLETE Column stats: NONE
Map 4
Map Operator Tree:
TableScan
@@ -489,24 +489,24 @@ STAGE PLANS:
0 {VALUE._col0}
1
outputColumnNames: _col1
- Statistics: Num rows: 16 Data size: 66 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col1 (type: string)
outputColumnNames: _col0
- Statistics: Num rows: 16 Data size: 66 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
- Statistics: Num rows: 16 Data size: 66 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE
Reducer 3
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: string)
outputColumnNames: _col0
- Statistics: Num rows: 16 Data size: 66 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 16 Data size: 66 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -676,24 +676,24 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: t3
- Statistics: Num rows: 46 Data size: 185 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: (key > 5) (type: boolean)
- Statistics: Num rows: 15 Data size: 60 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 7 Data size: 51 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 15 Data size: 60 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 7 Data size: 51 Basic stats: COMPLETE Column stats: NONE
Group By Operator
keys: _col0 (type: int)
mode: hash
outputColumnNames: _col0
- Statistics: Num rows: 15 Data size: 60 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 7 Data size: 51 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 15 Data size: 60 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 7 Data size: 51 Basic stats: COMPLETE Column stats: NONE
Reducer 2
Reduce Operator Tree:
Join Operator
@@ -703,24 +703,24 @@ STAGE PLANS:
0 {VALUE._col0}
1
outputColumnNames: _col1
- Statistics: Num rows: 16 Data size: 66 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col1 (type: string)
outputColumnNames: _col0
- Statistics: Num rows: 16 Data size: 66 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
- Statistics: Num rows: 16 Data size: 66 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE
Reducer 3
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: string)
outputColumnNames: _col0
- Statistics: Num rows: 16 Data size: 66 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 16 Data size: 66 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -992,15 +992,15 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: a
- Statistics: Num rows: 46 Data size: 185 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: key is not null (type: boolean)
- Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 11 Data size: 81 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: key (type: int)
sort order: +
Map-reduce partition columns: key (type: int)
- Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 11 Data size: 81 Basic stats: COMPLETE Column stats: NONE
Reducer 2
Reduce Operator Tree:
Join Operator
@@ -1010,24 +1010,24 @@ STAGE PLANS:
0 {KEY.reducesinkkey0}
1
outputColumnNames: _col0
- Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12 Data size: 89 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12 Data size: 89 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
- Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12 Data size: 89 Basic stats: COMPLETE Column stats: NONE
Reducer 3
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12 Data size: 89 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12 Data size: 89 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -1206,24 +1206,24 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: c
- Statistics: Num rows: 46 Data size: 185 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: key is not null (type: boolean)
- Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 11 Data size: 81 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: int)
outputColumnNames: key
- Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 11 Data size: 81 Basic stats: COMPLETE Column stats: NONE
Group By Operator
keys: key (type: int)
mode: hash
outputColumnNames: _col0
- Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 11 Data size: 81 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 11 Data size: 81 Basic stats: COMPLETE Column stats: NONE
Map 5
Map Operator Tree:
TableScan
@@ -1249,25 +1249,25 @@ STAGE PLANS:
1 {KEY.reducesinkkey0} {VALUE._col0}
2
outputColumnNames: _col0, _col1, _col5, _col6
- Statistics: Num rows: 50 Data size: 202 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 24 Data size: 178 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 50 Data size: 202 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 24 Data size: 178 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int), _col1 (type: string)
sort order: ++
- Statistics: Num rows: 50 Data size: 202 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 24 Data size: 178 Basic stats: COMPLETE Column stats: NONE
value expressions: _col2 (type: int), _col3 (type: string)
Reducer 3
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: int), VALUE._col1 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 50 Data size: 202 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 24 Data size: 178 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 50 Data size: 202 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 24 Data size: 178 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -1347,15 +1347,15 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: a
- Statistics: Num rows: 1 Data size: 185 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: (key is not null and value is not null) (type: boolean)
- Statistics: Num rows: 1 Data size: 185 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 44 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: key (type: int), value (type: string)
sort order: ++
Map-reduce partition columns: key (type: int), value (type: string)
- Statistics: Num rows: 1 Data size: 185 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 44 Basic stats: COMPLETE Column stats: NONE
Reducer 2
Reduce Operator Tree:
Join Operator
@@ -1365,24 +1365,24 @@ STAGE PLANS:
0 {KEY.reducesinkkey0} {KEY.reducesinkkey1}
1
outputColumnNames: _col0, _col1
- Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: int), _col1 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int), _col1 (type: string)
sort order: ++
- Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
Reducer 3
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -1482,15 +1482,15 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: a
- Statistics: Num rows: 46 Data size: 185 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: key is not null (type: boolean)
- Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 11 Data size: 81 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: key (type: int)
sort order: +
Map-reduce partition columns: key (type: int)
- Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 11 Data size: 81 Basic stats: COMPLETE Column stats: NONE
Reducer 2
Reduce Operator Tree:
Join Operator
@@ -1502,24 +1502,24 @@ STAGE PLANS:
1
2
outputColumnNames: _col0
- Statistics: Num rows: 50 Data size: 202 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 24 Data size: 178 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 50 Data size: 202 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 24 Data size: 178 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
- Statistics: Num rows: 50 Data size: 202 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 24 Data size: 178 Basic stats: COMPLETE Column stats: NONE
Reducer 3
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 50 Data size: 202 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 24 Data size: 178 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 50 Data size: 202 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 24 Data size: 178 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -1606,12 +1606,12 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: a
- Statistics: Num rows: 46 Data size: 185 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: key (type: int)
sort order: +
Map-reduce partition columns: key (type: int)
- Statistics: Num rows: 46 Data size: 185 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE
Reducer 2
Reduce Operator Tree:
Join Operator
@@ -1623,24 +1623,24 @@ STAGE PLANS:
1
2
outputColumnNames: _col0
- Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
- Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE
Reducer 3
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -1710,12 +1710,12 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: b
- Statistics: Num rows: 46 Data size: 185 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: key (type: int)
sort order: +
Map-reduce partition columns: key (type: int)
- Statistics: Num rows: 46 Data size: 185 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE
Map 4
Map Operator Tree:
TableScan
@@ -1756,24 +1756,24 @@ STAGE PLANS:
1
2
outputColumnNames: _col0
- Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
- Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE
Reducer 3
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -1846,12 +1846,12 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: b
- Statistics: Num rows: 46 Data size: 185 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: key (type: int)
sort order: +
Map-reduce partition columns: key (type: int)
- Statistics: Num rows: 46 Data size: 185 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE
Map 4
Map Operator Tree:
TableScan
@@ -1892,24 +1892,24 @@ STAGE PLANS:
1
2
outputColumnNames: _col0
- Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
- Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE
Reducer 3
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -2011,12 +2011,12 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: a
- Statistics: Num rows: 46 Data size: 185 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: key (type: int)
sort order: +
Map-reduce partition columns: key (type: int)
- Statistics: Num rows: 46 Data size: 185 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE
Reducer 2
Reduce Operator Tree:
Join Operator
@@ -2028,24 +2028,24 @@ STAGE PLANS:
1
2
outputColumnNames: _col0
- Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
- Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE
Reducer 3
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -2147,12 +2147,12 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: a
- Statistics: Num rows: 46 Data size: 185 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: key (type: int)
sort order: +
Map-reduce partition columns: key (type: int)
- Statistics: Num rows: 46 Data size: 185 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE
Reducer 2
Reduce Operator Tree:
Join Operator
@@ -2164,24 +2164,24 @@ STAGE PLANS:
1
2
outputColumnNames: _col0
- Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
- Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE
Reducer 3
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -2285,12 +2285,12 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: a
- Statistics: Num rows: 46 Data size: 185 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: key (type: int)
sort order: +
Map-reduce partition columns: key (type: int)
- Statistics: Num rows: 46 Data size: 185 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE
Reducer 2
Reduce Operator Tree:
Join Operator
@@ -2302,24 +2302,24 @@ STAGE PLANS:
1
2
outputColumnNames: _col0
- Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
- Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE
Reducer 3
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -2438,15 +2438,15 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: a
- Statistics: Num rows: 1 Data size: 185 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: key is not null (type: boolean)
- Statistics: Num rows: 1 Data size: 185 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 11 Data size: 81 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: key (type: int)
sort order: +
Map-reduce partition columns: key (type: int)
- Statistics: Num rows: 1 Data size: 185 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 11 Data size: 81 Basic stats: COMPLETE Column stats: NONE
value expressions: value (type: string)
Reducer 2
Reduce Operator Tree:
@@ -2457,12 +2457,12 @@ STAGE PLANS:
0 {KEY.reducesinkkey0} {VALUE._col0}
1
outputColumnNames: _col0, _col1
- Statistics: Num rows: 6 Data size: 49 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12 Data size: 89 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col1 (type: string)
sort order: +
Map-reduce partition columns: _col1 (type: string)
- Statistics: Num rows: 6 Data size: 49 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12 Data size: 89 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: int)
Reducer 3
Reduce Operator Tree:
@@ -2473,24 +2473,24 @@ STAGE PLANS:
0 {VALUE._col0}
1
outputColumnNames: _col0
- Statistics: Num rows: 12 Data size: 86 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 13 Data size: 97 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 12 Data size: 86 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 13 Data size: 97 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
- Statistics: Num rows: 12 Data size: 86 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 13 Data size: 97 Basic stats: COMPLETE Column stats: NONE
Reducer 4
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 12 Data size: 86 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 13 Data size: 97 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 12 Data size: 86 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 13 Data size: 97 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -2590,15 +2590,15 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: a
- Statistics: Num rows: 1 Data size: 185 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: (value is not null and (key > 100)) (type: boolean)
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Statistics: Num rows: 3 Data size: 22 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: value (type: string)
sort order: +
Map-reduce partition columns: value (type: string)
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Statistics: Num rows: 3 Data size: 22 Basic stats: COMPLETE Column stats: NONE
value expressions: key (type: int)
Reducer 2
Reduce Operator Tree:
Added: hive/branches/spark/ql/src/test/results/clientpositive/spark/stats1.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/stats1.q.out?rev=1640934&view=auto
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/stats1.q.out (added)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/stats1.q.out Fri Nov 21 15:17:16 2014
@@ -0,0 +1,247 @@
+PREHOOK: query: create table tmptable(key string, value string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tmptable
+POSTHOOK: query: create table tmptable(key string, value string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tmptable
+PREHOOK: query: EXPLAIN
+INSERT OVERWRITE TABLE tmptable
+SELECT unionsrc.key, unionsrc.value
+FROM (SELECT 'tst1' AS key, cast(count(1) AS string) AS value FROM src s1
+ UNION ALL
+ SELECT s2.key AS key, s2.value AS value FROM src1 s2) unionsrc
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+INSERT OVERWRITE TABLE tmptable
+SELECT unionsrc.key, unionsrc.value
+FROM (SELECT 'tst1' AS key, cast(count(1) AS string) AS value FROM src s1
+ UNION ALL
+ SELECT s2.key AS key, s2.value AS value FROM src1 s2) unionsrc
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+ Stage-3 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (GROUP, 1)
+ Union 3 <- Map 4 (NONE, 0), Reducer 2 (NONE, 0)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: s1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count(1)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint)
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: s2
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string)
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.tmptable
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Select Operator
+ expressions: 'tst1' (type: string), UDFToString(_col0) (type: string)
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string)
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.tmptable
+ Union 3
+ Vertex: Union 3
+
+ Stage: Stage-2
+ Dependency Collection
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.tmptable
+
+ Stage: Stage-3
+ Stats-Aggr Operator
+
+PREHOOK: query: INSERT OVERWRITE TABLE tmptable
+SELECT unionsrc.key, unionsrc.value
+FROM (SELECT 'tst1' AS key, cast(count(1) AS string) AS value FROM src s1
+ UNION ALL
+ SELECT s2.key AS key, s2.value AS value FROM src1 s2) unionsrc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Input: default@src1
+PREHOOK: Output: default@tmptable
+POSTHOOK: query: INSERT OVERWRITE TABLE tmptable
+SELECT unionsrc.key, unionsrc.value
+FROM (SELECT 'tst1' AS key, cast(count(1) AS string) AS value FROM src s1
+ UNION ALL
+ SELECT s2.key AS key, s2.value AS value FROM src1 s2) unionsrc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@src1
+POSTHOOK: Output: default@tmptable
+POSTHOOK: Lineage: tmptable.key EXPRESSION [(src1)s2.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: tmptable.value EXPRESSION [(src)s1.null, (src1)s2.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: SELECT * FROM tmptable x SORT BY x.key, x.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tmptable
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM tmptable x SORT BY x.key, x.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tmptable
+#### A masked pattern was here ####
+
+
+
+
+ val_165
+ val_193
+ val_265
+ val_27
+ val_409
+ val_484
+128
+146 val_146
+150 val_150
+213 val_213
+224
+238 val_238
+255 val_255
+273 val_273
+278 val_278
+311 val_311
+369
+401 val_401
+406 val_406
+66 val_66
+98 val_98
+tst1 500
+PREHOOK: query: DESCRIBE FORMATTED tmptable
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@tmptable
+POSTHOOK: query: DESCRIBE FORMATTED tmptable
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@tmptable
+# col_name data_type comment
+
+key string
+value string
+
+# Detailed Table Information
+Database: default
+#### A masked pattern was here ####
+Protect Mode: None
+Retention: 0
+#### A masked pattern was here ####
+Table Type: MANAGED_TABLE
+Table Parameters:
+ COLUMN_STATS_ACCURATE true
+ numFiles 2
+ numRows 26
+ rawDataSize 199
+ totalSize 225
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Load a file into a existing table
+-- Some stats (numFiles, totalSize) should be updated correctly
+-- Some other stats (numRows, rawDataSize) should be cleared
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE tmptable
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@tmptable
+POSTHOOK: query: -- Load a file into a existing table
+-- Some stats (numFiles, totalSize) should be updated correctly
+-- Some other stats (numRows, rawDataSize) should be cleared
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE tmptable
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@tmptable
+PREHOOK: query: DESCRIBE FORMATTED tmptable
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@tmptable
+POSTHOOK: query: DESCRIBE FORMATTED tmptable
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@tmptable
+# col_name data_type comment
+
+key string
+value string
+
+# Detailed Table Information
+Database: default
+#### A masked pattern was here ####
+Protect Mode: None
+Retention: 0
+#### A masked pattern was here ####
+Table Type: MANAGED_TABLE
+Table Parameters:
+ COLUMN_STATS_ACCURATE true
+ numFiles 3
+ numRows 0
+ rawDataSize 0
+ totalSize 1583
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1