You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by om...@apache.org on 2013/05/21 01:36:38 UTC
svn commit: r1484623 - in /hive/trunk/ql/src:
java/org/apache/hadoop/hive/ql/optimizer/
java/org/apache/hadoop/hive/ql/optimizer/physical/
test/queries/clientpositive/ test/results/clientpositive/
Author: omalley
Date: Mon May 20 23:36:37 2013
New Revision: 1484623
URL: http://svn.apache.org/r1484623
Log:
HIVE-4521 Auto join conversion fails in certain cases (Gunther Hagleitner via
omalley)
Added:
hive/trunk/ql/src/test/queries/clientpositive/auto_join32.q
hive/trunk/ql/src/test/results/clientpositive/auto_join32.q.out
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractBucketJoinProc.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AvgPartitionSizeBasedBigTableSelectorForAutoSMJ.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/TableSizeBasedBigTableSelectorForAutoSMJ.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinTaskDispatcher.java
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractBucketJoinProc.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractBucketJoinProc.java?rev=1484623&r1=1484622&r2=1484623&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractBucketJoinProc.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractBucketJoinProc.java Mon May 20 23:36:37 2013
@@ -303,7 +303,7 @@ abstract public class AbstractBucketJoin
// The number of files for the table should be same as number of buckets.
int bucketCount = p.getBucketCount();
- if (fileNames.size() != bucketCount) {
+ if (fileNames.size() != 0 && fileNames.size() != bucketCount) {
String msg = "The number of buckets for table " +
tbl.getTableName() + " partition " + p.getName() + " is " +
p.getBucketCount() + ", whereas the number of files is " + fileNames.size();
@@ -333,7 +333,7 @@ abstract public class AbstractBucketJoin
Integer num = new Integer(tbl.getNumBuckets());
// The number of files for the table should be same as number of buckets.
- if (fileNames.size() != num) {
+ if (fileNames.size() != 0 && fileNames.size() != num) {
String msg = "The number of buckets for table " +
tbl.getTableName() + " is " + tbl.getNumBuckets() +
", whereas the number of files is " + fileNames.size();
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AvgPartitionSizeBasedBigTableSelectorForAutoSMJ.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AvgPartitionSizeBasedBigTableSelectorForAutoSMJ.java?rev=1484623&r1=1484622&r2=1484623&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AvgPartitionSizeBasedBigTableSelectorForAutoSMJ.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AvgPartitionSizeBasedBigTableSelectorForAutoSMJ.java Mon May 20 23:36:37 2013
@@ -48,7 +48,7 @@ public class AvgPartitionSizeBasedBigTab
public int getBigTablePosition(ParseContext parseCtx, JoinOperator joinOp)
throws SemanticException {
int bigTablePos = 0;
- long maxSize = 0;
+ long maxSize = -1;
int numPartitionsCurrentBigTable = 0; // number of partitions for the chosen big table
HiveConf conf = parseCtx.getConf();
@@ -79,7 +79,7 @@ public class AvgPartitionSizeBasedBigTab
for (Partition part : partsList.getNotDeniedPartns()) {
totalSize += getSize(conf, part);
}
- averageSize = totalSize/numPartitions;
+ averageSize = numPartitions == 0 ? 0 : totalSize/numPartitions;
}
if (averageSize > maxSize) {
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/TableSizeBasedBigTableSelectorForAutoSMJ.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/TableSizeBasedBigTableSelectorForAutoSMJ.java?rev=1484623&r1=1484622&r2=1484623&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/TableSizeBasedBigTableSelectorForAutoSMJ.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/TableSizeBasedBigTableSelectorForAutoSMJ.java Mon May 20 23:36:37 2013
@@ -41,7 +41,7 @@ implements BigTableSelectorForAutoSMJ {
public int getBigTablePosition(ParseContext parseCtx, JoinOperator joinOp)
throws SemanticException {
int bigTablePos = 0;
- long maxSize = 0;
+ long maxSize = -1;
HiveConf conf = parseCtx.getConf();
try {
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinTaskDispatcher.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinTaskDispatcher.java?rev=1484623&r1=1484622&r2=1484623&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinTaskDispatcher.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinTaskDispatcher.java Mon May 20 23:36:37 2013
@@ -466,7 +466,7 @@ public class CommonJoinTaskDispatcher ex
HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD);
boolean bigTableFound = false;
- long largestBigTableCandidateSize = 0;
+ long largestBigTableCandidateSize = -1;
long sumTableSizes = 0;
for (String alias : aliasToWork.keySet()) {
int tablePosition = getPosition(currWork, joinOp, alias);
Added: hive/trunk/ql/src/test/queries/clientpositive/auto_join32.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/auto_join32.q?rev=1484623&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/auto_join32.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/auto_join32.q Mon May 20 23:36:37 2013
@@ -0,0 +1,78 @@
+set hive.auto.convert.join=true;
+
+-- empty tables
+create table studenttab10k (name string, age int, gpa double);
+create table votertab10k (name string, age int, registration string, contributions float);
+
+explain select s.name, count(distinct registration)
+from studenttab10k s join votertab10k v
+on (s.name = v.name)
+group by s.name;
+
+select s.name, count(distinct registration)
+from studenttab10k s join votertab10k v
+on (s.name = v.name)
+group by s.name;
+
+set hive.optimize.bucketmapjoin=true;
+set hive.optimize.bucketmapjoin.sortedmerge=true;
+set hive.auto.convert.sortmerge.join=true;
+
+-- smb
+create table studenttab10k_smb (name string, age int, gpa double) clustered by (name) sorted by (name) into 2 buckets;
+create table votertab10k_smb (name string, age int, registration string, contributions float) clustered by (name) sorted by (name) into 2 buckets;
+
+explain select s.name, count(distinct registration)
+from studenttab10k_smb s join votertab10k_smb v
+on (s.name = v.name)
+group by s.name;
+
+select s.name, count(distinct registration)
+from studenttab10k_smb s join votertab10k_smb v
+on (s.name = v.name)
+group by s.name;
+
+load data local inpath '../data/files/empty1.txt' into table studenttab10k_smb;
+load data local inpath '../data/files/empty2.txt' into table studenttab10k_smb;
+load data local inpath '../data/files/empty1.txt' into table votertab10k_smb;
+load data local inpath '../data/files/empty2.txt' into table votertab10k_smb;
+
+explain select s.name, count(distinct registration)
+from studenttab10k_smb s join votertab10k_smb v
+on (s.name = v.name)
+group by s.name;
+
+select s.name, count(distinct registration)
+from studenttab10k_smb s join votertab10k_smb v
+on (s.name = v.name)
+group by s.name;
+
+-- smb + partitions
+create table studenttab10k_part (name string, age int, gpa double) partitioned by (p string) clustered by (name) sorted by (name) into 2 buckets;
+create table votertab10k_part (name string, age int, registration string, contributions float) partitioned by (p string) clustered by (name) sorted by (name) into 2 buckets;
+
+load data local inpath '../data/files/empty1.txt' into table studenttab10k_part partition (p='foo');
+load data local inpath '../data/files/empty2.txt' into table studenttab10k_part partition (p='foo');
+load data local inpath '../data/files/empty1.txt' into table votertab10k_part partition (p='foo');
+load data local inpath '../data/files/empty2.txt' into table votertab10k_part partition (p='foo');
+
+explain select s.name, count(distinct registration)
+from studenttab10k_part s join votertab10k_part v
+on (s.name = v.name)
+where s.p = 'bar'
+and v.p = 'bar'
+group by s.name;
+
+select s.name, count(distinct registration)
+from studenttab10k_part s join votertab10k_part v
+on (s.name = v.name)
+where s.p = 'bar'
+and v.p = 'bar'
+group by s.name;
+
+drop table studenttab10k;
+drop table votertab10k;
+drop table studenttab10k_smb;
+drop table votertab10k_smb;
+drop table studenttab10k_part;
+drop table votertab10k_part;
\ No newline at end of file
Added: hive/trunk/ql/src/test/results/clientpositive/auto_join32.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/auto_join32.q.out?rev=1484623&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/auto_join32.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/auto_join32.q.out Mon May 20 23:36:37 2013
@@ -0,0 +1,633 @@
+PREHOOK: query: -- empty tables
+create table studenttab10k (name string, age int, gpa double)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- empty tables
+create table studenttab10k (name string, age int, gpa double)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@studenttab10k
+PREHOOK: query: create table votertab10k (name string, age int, registration string, contributions float)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table votertab10k (name string, age int, registration string, contributions float)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@votertab10k
+PREHOOK: query: explain select s.name, count(distinct registration)
+from studenttab10k s join votertab10k v
+on (s.name = v.name)
+group by s.name
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select s.name, count(distinct registration)
+from studenttab10k s join votertab10k v
+on (s.name = v.name)
+group by s.name
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME studenttab10k) s) (TOK_TABREF (TOK_TABNAME votertab10k) v) (= (. (TOK_TABLE_OR_COL s) name) (. (TOK_TABLE_OR_COL v) name)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL s) name)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL registration)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL s) name))))
+
+STAGE DEPENDENCIES:
+ Stage-5 is a root stage
+ Stage-4 depends on stages: Stage-5
+ Stage-2 depends on stages: Stage-4
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-5
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ s
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ s
+ TableScan
+ alias: s
+ HashTable Sink Operator
+ condition expressions:
+ 0 {name}
+ 1 {registration}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[name]]
+ 1 [Column[name]]
+ Position of Big Table: 1
+
+ Stage: Stage-4
+ Map Reduce
+ Alias -> Map Operator Tree:
+ v
+ TableScan
+ alias: v
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {name}
+ 1 {registration}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[name]]
+ 1 [Column[name]]
+ outputColumnNames: _col0, _col7
+ Position of Big Table: 1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col7
+ type: string
+ outputColumnNames: _col0, _col7
+ Group By Operator
+ aggregations:
+ expr: count(DISTINCT _col7)
+ bucketGroup: false
+ keys:
+ expr: _col0
+ type: string
+ expr: _col7
+ type: string
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-2
+ Map Reduce
+ Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ sort order: ++
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col2
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(DISTINCT KEY._col1:0._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: select s.name, count(distinct registration)
+from studenttab10k s join votertab10k v
+on (s.name = v.name)
+group by s.name
+PREHOOK: type: QUERY
+PREHOOK: Input: default@studenttab10k
+PREHOOK: Input: default@votertab10k
+#### A masked pattern was here ####
+POSTHOOK: query: select s.name, count(distinct registration)
+from studenttab10k s join votertab10k v
+on (s.name = v.name)
+group by s.name
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@studenttab10k
+POSTHOOK: Input: default@votertab10k
+#### A masked pattern was here ####
+PREHOOK: query: -- smb
+create table studenttab10k_smb (name string, age int, gpa double) clustered by (name) sorted by (name) into 2 buckets
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- smb
+create table studenttab10k_smb (name string, age int, gpa double) clustered by (name) sorted by (name) into 2 buckets
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@studenttab10k_smb
+PREHOOK: query: create table votertab10k_smb (name string, age int, registration string, contributions float) clustered by (name) sorted by (name) into 2 buckets
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table votertab10k_smb (name string, age int, registration string, contributions float) clustered by (name) sorted by (name) into 2 buckets
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@votertab10k_smb
+PREHOOK: query: explain select s.name, count(distinct registration)
+from studenttab10k_smb s join votertab10k_smb v
+on (s.name = v.name)
+group by s.name
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select s.name, count(distinct registration)
+from studenttab10k_smb s join votertab10k_smb v
+on (s.name = v.name)
+group by s.name
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME studenttab10k_smb) s) (TOK_TABREF (TOK_TABNAME votertab10k_smb) v) (= (. (TOK_TABLE_OR_COL s) name) (. (TOK_TABLE_OR_COL v) name)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL s) name)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL registration)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL s) name))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ s
+ TableScan
+ alias: s
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {name}
+ 1 {registration}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[name]]
+ 1 [Column[name]]
+ outputColumnNames: _col0, _col7
+ Position of Big Table: 0
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col7
+ type: string
+ outputColumnNames: _col0, _col7
+ Group By Operator
+ aggregations:
+ expr: count(DISTINCT _col7)
+ bucketGroup: false
+ keys:
+ expr: _col0
+ type: string
+ expr: _col7
+ type: string
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ sort order: ++
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col2
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(DISTINCT KEY._col1:0._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: select s.name, count(distinct registration)
+from studenttab10k_smb s join votertab10k_smb v
+on (s.name = v.name)
+group by s.name
+PREHOOK: type: QUERY
+PREHOOK: Input: default@studenttab10k_smb
+PREHOOK: Input: default@votertab10k_smb
+#### A masked pattern was here ####
+POSTHOOK: query: select s.name, count(distinct registration)
+from studenttab10k_smb s join votertab10k_smb v
+on (s.name = v.name)
+group by s.name
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@studenttab10k_smb
+POSTHOOK: Input: default@votertab10k_smb
+#### A masked pattern was here ####
+PREHOOK: query: load data local inpath '../data/files/empty1.txt' into table studenttab10k_smb
+PREHOOK: type: LOAD
+PREHOOK: Output: default@studenttab10k_smb
+POSTHOOK: query: load data local inpath '../data/files/empty1.txt' into table studenttab10k_smb
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@studenttab10k_smb
+PREHOOK: query: load data local inpath '../data/files/empty2.txt' into table studenttab10k_smb
+PREHOOK: type: LOAD
+PREHOOK: Output: default@studenttab10k_smb
+POSTHOOK: query: load data local inpath '../data/files/empty2.txt' into table studenttab10k_smb
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@studenttab10k_smb
+PREHOOK: query: load data local inpath '../data/files/empty1.txt' into table votertab10k_smb
+PREHOOK: type: LOAD
+PREHOOK: Output: default@votertab10k_smb
+POSTHOOK: query: load data local inpath '../data/files/empty1.txt' into table votertab10k_smb
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@votertab10k_smb
+PREHOOK: query: load data local inpath '../data/files/empty2.txt' into table votertab10k_smb
+PREHOOK: type: LOAD
+PREHOOK: Output: default@votertab10k_smb
+POSTHOOK: query: load data local inpath '../data/files/empty2.txt' into table votertab10k_smb
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@votertab10k_smb
+PREHOOK: query: explain select s.name, count(distinct registration)
+from studenttab10k_smb s join votertab10k_smb v
+on (s.name = v.name)
+group by s.name
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select s.name, count(distinct registration)
+from studenttab10k_smb s join votertab10k_smb v
+on (s.name = v.name)
+group by s.name
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME studenttab10k_smb) s) (TOK_TABREF (TOK_TABNAME votertab10k_smb) v) (= (. (TOK_TABLE_OR_COL s) name) (. (TOK_TABLE_OR_COL v) name)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL s) name)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL registration)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL s) name))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ s
+ TableScan
+ alias: s
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {name}
+ 1 {registration}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[name]]
+ 1 [Column[name]]
+ outputColumnNames: _col0, _col7
+ Position of Big Table: 0
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col7
+ type: string
+ outputColumnNames: _col0, _col7
+ Group By Operator
+ aggregations:
+ expr: count(DISTINCT _col7)
+ bucketGroup: false
+ keys:
+ expr: _col0
+ type: string
+ expr: _col7
+ type: string
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ sort order: ++
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col2
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(DISTINCT KEY._col1:0._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: select s.name, count(distinct registration)
+from studenttab10k_smb s join votertab10k_smb v
+on (s.name = v.name)
+group by s.name
+PREHOOK: type: QUERY
+PREHOOK: Input: default@studenttab10k_smb
+PREHOOK: Input: default@votertab10k_smb
+#### A masked pattern was here ####
+POSTHOOK: query: select s.name, count(distinct registration)
+from studenttab10k_smb s join votertab10k_smb v
+on (s.name = v.name)
+group by s.name
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@studenttab10k_smb
+POSTHOOK: Input: default@votertab10k_smb
+#### A masked pattern was here ####
+PREHOOK: query: -- smb + partitions
+create table studenttab10k_part (name string, age int, gpa double) partitioned by (p string) clustered by (name) sorted by (name) into 2 buckets
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- smb + partitions
+create table studenttab10k_part (name string, age int, gpa double) partitioned by (p string) clustered by (name) sorted by (name) into 2 buckets
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@studenttab10k_part
+PREHOOK: query: create table votertab10k_part (name string, age int, registration string, contributions float) partitioned by (p string) clustered by (name) sorted by (name) into 2 buckets
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table votertab10k_part (name string, age int, registration string, contributions float) partitioned by (p string) clustered by (name) sorted by (name) into 2 buckets
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@votertab10k_part
+PREHOOK: query: load data local inpath '../data/files/empty1.txt' into table studenttab10k_part partition (p='foo')
+PREHOOK: type: LOAD
+PREHOOK: Output: default@studenttab10k_part
+POSTHOOK: query: load data local inpath '../data/files/empty1.txt' into table studenttab10k_part partition (p='foo')
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@studenttab10k_part
+POSTHOOK: Output: default@studenttab10k_part@p=foo
+PREHOOK: query: load data local inpath '../data/files/empty2.txt' into table studenttab10k_part partition (p='foo')
+PREHOOK: type: LOAD
+PREHOOK: Output: default@studenttab10k_part@p=foo
+POSTHOOK: query: load data local inpath '../data/files/empty2.txt' into table studenttab10k_part partition (p='foo')
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@studenttab10k_part@p=foo
+PREHOOK: query: load data local inpath '../data/files/empty1.txt' into table votertab10k_part partition (p='foo')
+PREHOOK: type: LOAD
+PREHOOK: Output: default@votertab10k_part
+POSTHOOK: query: load data local inpath '../data/files/empty1.txt' into table votertab10k_part partition (p='foo')
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@votertab10k_part
+POSTHOOK: Output: default@votertab10k_part@p=foo
+PREHOOK: query: load data local inpath '../data/files/empty2.txt' into table votertab10k_part partition (p='foo')
+PREHOOK: type: LOAD
+PREHOOK: Output: default@votertab10k_part@p=foo
+POSTHOOK: query: load data local inpath '../data/files/empty2.txt' into table votertab10k_part partition (p='foo')
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@votertab10k_part@p=foo
+PREHOOK: query: explain select s.name, count(distinct registration)
+from studenttab10k_part s join votertab10k_part v
+on (s.name = v.name)
+where s.p = 'bar'
+and v.p = 'bar'
+group by s.name
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select s.name, count(distinct registration)
+from studenttab10k_part s join votertab10k_part v
+on (s.name = v.name)
+where s.p = 'bar'
+and v.p = 'bar'
+group by s.name
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME studenttab10k_part) s) (TOK_TABREF (TOK_TABNAME votertab10k_part) v) (= (. (TOK_TABLE_OR_COL s) name) (. (TOK_TABLE_OR_COL v) name)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL s) name)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL registration)))) (TOK_WHERE (and (= (. (TOK_TABLE_OR_COL s) p) 'bar') (= (. (TOK_TABLE_OR_COL v) p) 'bar'))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL s) name))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ s
+ TableScan
+ alias: s
+ Filter Operator
+ predicate:
+ expr: (p = 'bar')
+ type: boolean
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {name}
+ 1 {registration}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[name]]
+ 1 [Column[name]]
+ outputColumnNames: _col0, _col8
+ Position of Big Table: 0
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col8
+ type: string
+ outputColumnNames: _col0, _col8
+ Group By Operator
+ aggregations:
+ expr: count(DISTINCT _col8)
+ bucketGroup: false
+ keys:
+ expr: _col0
+ type: string
+ expr: _col8
+ type: string
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ sort order: ++
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col2
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(DISTINCT KEY._col1:0._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: select s.name, count(distinct registration)
+from studenttab10k_part s join votertab10k_part v
+on (s.name = v.name)
+where s.p = 'bar'
+and v.p = 'bar'
+group by s.name
+PREHOOK: type: QUERY
+PREHOOK: Input: default@studenttab10k_part
+PREHOOK: Input: default@votertab10k_part
+#### A masked pattern was here ####
+POSTHOOK: query: select s.name, count(distinct registration)
+from studenttab10k_part s join votertab10k_part v
+on (s.name = v.name)
+where s.p = 'bar'
+and v.p = 'bar'
+group by s.name
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@studenttab10k_part
+POSTHOOK: Input: default@votertab10k_part
+#### A masked pattern was here ####
+PREHOOK: query: drop table studenttab10k
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@studenttab10k
+PREHOOK: Output: default@studenttab10k
+POSTHOOK: query: drop table studenttab10k
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@studenttab10k
+POSTHOOK: Output: default@studenttab10k
+PREHOOK: query: drop table votertab10k
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@votertab10k
+PREHOOK: Output: default@votertab10k
+POSTHOOK: query: drop table votertab10k
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@votertab10k
+POSTHOOK: Output: default@votertab10k
+PREHOOK: query: drop table studenttab10k_smb
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@studenttab10k_smb
+PREHOOK: Output: default@studenttab10k_smb
+POSTHOOK: query: drop table studenttab10k_smb
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@studenttab10k_smb
+POSTHOOK: Output: default@studenttab10k_smb
+PREHOOK: query: drop table votertab10k_smb
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@votertab10k_smb
+PREHOOK: Output: default@votertab10k_smb
+POSTHOOK: query: drop table votertab10k_smb
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@votertab10k_smb
+POSTHOOK: Output: default@votertab10k_smb
+PREHOOK: query: drop table studenttab10k_part
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@studenttab10k_part
+PREHOOK: Output: default@studenttab10k_part
+POSTHOOK: query: drop table studenttab10k_part
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@studenttab10k_part
+POSTHOOK: Output: default@studenttab10k_part
+PREHOOK: query: drop table votertab10k_part
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@votertab10k_part
+PREHOOK: Output: default@votertab10k_part
+POSTHOOK: query: drop table votertab10k_part
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@votertab10k_part
+POSTHOOK: Output: default@votertab10k_part