You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by th...@apache.org on 2013/09/11 23:41:30 UTC
svn commit: r1522058 - in /hive/branches/branch-0.12/ql/src:
java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java
test/queries/clientpositive/auto_join_reordering_values.q
test/results/clientpositive/auto_join_reordering_values.q.out
Author: thejas
Date: Wed Sep 11 21:41:30 2013
New Revision: 1522058
URL: http://svn.apache.org/r1522058
Log:
HIVE-5056 : MapJoinProcessor ignores order of values in removing RS (Navis Ryu via Ashutosh Chauhan)
Added:
hive/branches/branch-0.12/ql/src/test/queries/clientpositive/auto_join_reordering_values.q
hive/branches/branch-0.12/ql/src/test/results/clientpositive/auto_join_reordering_values.q.out
Modified:
hive/branches/branch-0.12/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java
Modified: hive/branches/branch-0.12/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.12/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java?rev=1522058&r1=1522057&r2=1522058&view=diff
==============================================================================
--- hive/branches/branch-0.12/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java (original)
+++ hive/branches/branch-0.12/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java Wed Sep 11 21:41:30 2013
@@ -333,11 +333,8 @@ public class MapJoinProcessor implements
}
}
- RowResolver oldOutputRS = opParseCtxMap.get(op).getRowResolver();
- RowResolver outputRS = new RowResolver();
- ArrayList<String> outputColumnNames = new ArrayList<String>();
+ RowResolver outputRS = opParseCtxMap.get(op).getRowResolver();
Map<Byte, List<ExprNodeDesc>> keyExprMap = new HashMap<Byte, List<ExprNodeDesc>>();
- Map<Byte, List<ExprNodeDesc>> valueExprMap = new HashMap<Byte, List<ExprNodeDesc>>();
// Walk over all the sources (which are guaranteed to be reduce sink
// operators).
@@ -349,7 +346,6 @@ public class MapJoinProcessor implements
new ArrayList<Operator<? extends OperatorDesc>>();
List<Operator<? extends OperatorDesc>> oldReduceSinkParentOps =
new ArrayList<Operator<? extends OperatorDesc>>();
- Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
// found a source which is not to be stored in memory
if (leftSrc != null) {
@@ -385,37 +381,34 @@ public class MapJoinProcessor implements
keyExprMap.put(pos, keys);
}
- // create the map-join operator
- for (pos = 0; pos < newParentOps.size(); pos++) {
- RowResolver inputRS = opParseCtxMap.get(newParentOps.get(pos)).getRowResolver();
- List<ExprNodeDesc> values = new ArrayList<ExprNodeDesc>();
-
- Iterator<String> keysIter = inputRS.getTableNames().iterator();
- while (keysIter.hasNext()) {
- String key = keysIter.next();
- HashMap<String, ColumnInfo> rrMap = inputRS.getFieldMap(key);
- Iterator<String> fNamesIter = rrMap.keySet().iterator();
- while (fNamesIter.hasNext()) {
- String field = fNamesIter.next();
- ColumnInfo valueInfo = inputRS.get(key, field);
- ColumnInfo oldValueInfo = oldOutputRS.get(key, field);
- if (oldValueInfo == null) {
- continue;
- }
- String outputCol = oldValueInfo.getInternalName();
- if (outputRS.get(key, field) == null) {
- outputColumnNames.add(outputCol);
- ExprNodeDesc colDesc = new ExprNodeColumnDesc(valueInfo.getType(), valueInfo
- .getInternalName(), valueInfo.getTabAlias(), valueInfo.getIsVirtualCol());
- values.add(colDesc);
- outputRS.put(key, field, new ColumnInfo(outputCol, valueInfo.getType(), valueInfo
- .getTabAlias(), valueInfo.getIsVirtualCol(), valueInfo.isHiddenVirtualCol()));
- colExprMap.put(outputCol, colDesc);
- }
+ // removing RS, only ExprNodeDesc is changed (key/value/filter exprs and colExprMap)
+ // others (output column-name, RR, schema) remain intact
+ Map<String, ExprNodeDesc> colExprMap = op.getColumnExprMap();
+ List<String> outputColumnNames = op.getConf().getOutputColumnNames();
+
+ List<ColumnInfo> schema = new ArrayList<ColumnInfo>(op.getSchema().getSignature());
+
+ Map<Byte, List<ExprNodeDesc>> valueExprs = op.getConf().getExprs();
+ Map<Byte, List<ExprNodeDesc>> newValueExprs = new HashMap<Byte, List<ExprNodeDesc>>();
+ for (Map.Entry<Byte, List<ExprNodeDesc>> entry : valueExprs.entrySet()) {
+ byte tag = entry.getKey();
+ Operator<?> terminal = oldReduceSinkParentOps.get(tag);
+
+ List<ExprNodeDesc> values = entry.getValue();
+ List<ExprNodeDesc> newValues = ExprNodeDescUtils.backtrack(values, op, terminal);
+ newValueExprs.put(tag, newValues);
+ for (int i = 0; i < schema.size(); i++) {
+ ColumnInfo column = schema.get(i);
+ if (column == null) {
+ continue;
+ }
+ ExprNodeDesc expr = colExprMap.get(column.getInternalName());
+ int index = ExprNodeDescUtils.indexOf(expr, values);
+ if (index >= 0) {
+ colExprMap.put(column.getInternalName(), newValues.get(index));
+ schema.set(i, null);
}
}
-
- valueExprMap.put(Byte.valueOf((byte) pos), values);
}
Map<Byte, List<ExprNodeDesc>> filters = desc.getFilters();
@@ -456,7 +449,7 @@ public class MapJoinProcessor implements
int[][] filterMap = desc.getFilterMap();
for (pos = 0; pos < newParentOps.size(); pos++) {
- List<ExprNodeDesc> valueCols = valueExprMap.get(Byte.valueOf((byte) pos));
+ List<ExprNodeDesc> valueCols = newValueExprs.get(pos);
int length = valueCols.size();
List<ExprNodeDesc> valueFilteredCols = new ArrayList<ExprNodeDesc>(length);
// deep copy expr node desc
@@ -492,7 +485,7 @@ public class MapJoinProcessor implements
} else {
dumpFilePrefix = "mapfile"+PlanUtils.getCountForMapJoinDumpFilePrefix();
}
- MapJoinDesc mapJoinDescriptor = new MapJoinDesc(keyExprMap, keyTableDesc, valueExprMap,
+ MapJoinDesc mapJoinDescriptor = new MapJoinDesc(keyExprMap, keyTableDesc, newValueExprs,
valueTableDescs, valueFiltedTableDescs, outputColumnNames, mapJoinPos, joinCondns,
filters, op.getConf().getNoOuterJoin(), dumpFilePrefix);
mapJoinDescriptor.setTagOrder(tagOrder);
Added: hive/branches/branch-0.12/ql/src/test/queries/clientpositive/auto_join_reordering_values.q
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.12/ql/src/test/queries/clientpositive/auto_join_reordering_values.q?rev=1522058&view=auto
==============================================================================
--- hive/branches/branch-0.12/ql/src/test/queries/clientpositive/auto_join_reordering_values.q (added)
+++ hive/branches/branch-0.12/ql/src/test/queries/clientpositive/auto_join_reordering_values.q Wed Sep 11 21:41:30 2013
@@ -0,0 +1,31 @@
+-- HIVE-5056 RS has expression list for values, but it's ignored in MapJoinProcessor
+
+create table testsrc ( `key` int,`val` string);
+load data local inpath '../data/files/kv1.txt' overwrite into table testsrc;
+drop table if exists orderpayment_small;
+create table orderpayment_small (`dealid` int,`date` string,`time` string, `cityid` int, `userid` int);
+insert overwrite table orderpayment_small select 748, '2011-03-24', '2011-03-24', 55 ,5372613 from testsrc limit 1;
+drop table if exists user_small;
+create table user_small( userid int);
+insert overwrite table user_small select key from testsrc limit 100;
+
+set hive.auto.convert.join.noconditionaltask.size = 200;
+explain extended SELECT
+ `dim_pay_date`.`date`
+ , `deal`.`dealid`
+FROM `orderpayment_small` `orderpayment`
+JOIN `orderpayment_small` `dim_pay_date` ON `dim_pay_date`.`date` = `orderpayment`.`date`
+JOIN `orderpayment_small` `deal` ON `deal`.`dealid` = `orderpayment`.`dealid`
+JOIN `orderpayment_small` `order_city` ON `order_city`.`cityid` = `orderpayment`.`cityid`
+JOIN `user_small` `user` ON `user`.`userid` = `orderpayment`.`userid`
+limit 5;
+
+SELECT
+ `dim_pay_date`.`date`
+ , `deal`.`dealid`
+FROM `orderpayment_small` `orderpayment`
+JOIN `orderpayment_small` `dim_pay_date` ON `dim_pay_date`.`date` = `orderpayment`.`date`
+JOIN `orderpayment_small` `deal` ON `deal`.`dealid` = `orderpayment`.`dealid`
+JOIN `orderpayment_small` `order_city` ON `order_city`.`cityid` = `orderpayment`.`cityid`
+JOIN `user_small` `user` ON `user`.`userid` = `orderpayment`.`userid`
+limit 5;
Added: hive/branches/branch-0.12/ql/src/test/results/clientpositive/auto_join_reordering_values.q.out
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.12/ql/src/test/results/clientpositive/auto_join_reordering_values.q.out?rev=1522058&view=auto
==============================================================================
--- hive/branches/branch-0.12/ql/src/test/results/clientpositive/auto_join_reordering_values.q.out (added)
+++ hive/branches/branch-0.12/ql/src/test/results/clientpositive/auto_join_reordering_values.q.out Wed Sep 11 21:41:30 2013
@@ -0,0 +1,637 @@
+PREHOOK: query: -- HIVE-5056 RS has expression list for values, but it's ignored in MapJoinProcessor
+
+create table testsrc ( `key` int,`val` string)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- HIVE-5056 RS has expression list for values, but it's ignored in MapJoinProcessor
+
+create table testsrc ( `key` int,`val` string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@testsrc
+PREHOOK: query: load data local inpath '../data/files/kv1.txt' overwrite into table testsrc
+PREHOOK: type: LOAD
+PREHOOK: Output: default@testsrc
+POSTHOOK: query: load data local inpath '../data/files/kv1.txt' overwrite into table testsrc
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@testsrc
+PREHOOK: query: drop table if exists orderpayment_small
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists orderpayment_small
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table orderpayment_small (`dealid` int,`date` string,`time` string, `cityid` int, `userid` int)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table orderpayment_small (`dealid` int,`date` string,`time` string, `cityid` int, `userid` int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@orderpayment_small
+PREHOOK: query: insert overwrite table orderpayment_small select 748, '2011-03-24', '2011-03-24', 55 ,5372613 from testsrc limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@testsrc
+PREHOOK: Output: default@orderpayment_small
+POSTHOOK: query: insert overwrite table orderpayment_small select 748, '2011-03-24', '2011-03-24', 55 ,5372613 from testsrc limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@testsrc
+POSTHOOK: Output: default@orderpayment_small
+POSTHOOK: Lineage: orderpayment_small.cityid SIMPLE []
+POSTHOOK: Lineage: orderpayment_small.date SIMPLE []
+POSTHOOK: Lineage: orderpayment_small.dealid SIMPLE []
+POSTHOOK: Lineage: orderpayment_small.time SIMPLE []
+POSTHOOK: Lineage: orderpayment_small.userid SIMPLE []
+PREHOOK: query: drop table if exists user_small
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists user_small
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Lineage: orderpayment_small.cityid SIMPLE []
+POSTHOOK: Lineage: orderpayment_small.date SIMPLE []
+POSTHOOK: Lineage: orderpayment_small.dealid SIMPLE []
+POSTHOOK: Lineage: orderpayment_small.time SIMPLE []
+POSTHOOK: Lineage: orderpayment_small.userid SIMPLE []
+PREHOOK: query: create table user_small( userid int)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table user_small( userid int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@user_small
+POSTHOOK: Lineage: orderpayment_small.cityid SIMPLE []
+POSTHOOK: Lineage: orderpayment_small.date SIMPLE []
+POSTHOOK: Lineage: orderpayment_small.dealid SIMPLE []
+POSTHOOK: Lineage: orderpayment_small.time SIMPLE []
+POSTHOOK: Lineage: orderpayment_small.userid SIMPLE []
+PREHOOK: query: insert overwrite table user_small select key from testsrc limit 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@testsrc
+PREHOOK: Output: default@user_small
+POSTHOOK: query: insert overwrite table user_small select key from testsrc limit 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@testsrc
+POSTHOOK: Output: default@user_small
+POSTHOOK: Lineage: orderpayment_small.cityid SIMPLE []
+POSTHOOK: Lineage: orderpayment_small.date SIMPLE []
+POSTHOOK: Lineage: orderpayment_small.dealid SIMPLE []
+POSTHOOK: Lineage: orderpayment_small.time SIMPLE []
+POSTHOOK: Lineage: orderpayment_small.userid SIMPLE []
+POSTHOOK: Lineage: user_small.userid SIMPLE [(testsrc)testsrc.FieldSchema(name:key, type:int, comment:null), ]
+PREHOOK: query: explain extended SELECT
+ `dim_pay_date`.`date`
+ , `deal`.`dealid`
+FROM `orderpayment_small` `orderpayment`
+JOIN `orderpayment_small` `dim_pay_date` ON `dim_pay_date`.`date` = `orderpayment`.`date`
+JOIN `orderpayment_small` `deal` ON `deal`.`dealid` = `orderpayment`.`dealid`
+JOIN `orderpayment_small` `order_city` ON `order_city`.`cityid` = `orderpayment`.`cityid`
+JOIN `user_small` `user` ON `user`.`userid` = `orderpayment`.`userid`
+limit 5
+PREHOOK: type: QUERY
+POSTHOOK: query: explain extended SELECT
+ `dim_pay_date`.`date`
+ , `deal`.`dealid`
+FROM `orderpayment_small` `orderpayment`
+JOIN `orderpayment_small` `dim_pay_date` ON `dim_pay_date`.`date` = `orderpayment`.`date`
+JOIN `orderpayment_small` `deal` ON `deal`.`dealid` = `orderpayment`.`dealid`
+JOIN `orderpayment_small` `order_city` ON `order_city`.`cityid` = `orderpayment`.`cityid`
+JOIN `user_small` `user` ON `user`.`userid` = `orderpayment`.`userid`
+limit 5
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: orderpayment_small.cityid SIMPLE []
+POSTHOOK: Lineage: orderpayment_small.date SIMPLE []
+POSTHOOK: Lineage: orderpayment_small.dealid SIMPLE []
+POSTHOOK: Lineage: orderpayment_small.time SIMPLE []
+POSTHOOK: Lineage: orderpayment_small.userid SIMPLE []
+POSTHOOK: Lineage: user_small.userid SIMPLE [(testsrc)testsrc.FieldSchema(name:key, type:int, comment:null), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME `orderpayment_small`) `orderpayment`) (TOK_TABREF (TOK_TABNAME `orderpayment_small`) `dim_pay_date`) (= (. (TOK_TABLE_OR_COL `dim_pay_date`) `date`) (. (TOK_TABLE_OR_COL `orderpayment`) `date`))) (TOK_TABREF (TOK_TABNAME `orderpayment_small`) `deal`) (= (. (TOK_TABLE_OR_COL `deal`) `dealid`) (. (TOK_TABLE_OR_COL `orderpayment`) `dealid`))) (TOK_TABREF (TOK_TABNAME `orderpayment_small`) `order_city`) (= (. (TOK_TABLE_OR_COL `order_city`) `cityid`) (. (TOK_TABLE_OR_COL `orderpayment`) `cityid`))) (TOK_TABREF (TOK_TABNAME `user_small`) `user`) (= (. (TOK_TABLE_OR_COL `user`) `userid`) (. (TOK_TABLE_OR_COL `orderpayment`) `userid`)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL `dim_pay_date`) `date`)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL `deal`) `dealid`))) (TOK_LIMIT 5)))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-3 depends on stages: Stage-2
+ Stage-4 depends on stages: Stage-3
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ dim_pay_date
+ TableScan
+ alias: dim_pay_date
+ GatherStats: false
+ Reduce Output Operator
+ key expressions:
+ expr: date
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: date
+ type: string
+ tag: 1
+ value expressions:
+ expr: date
+ type: string
+ orderpayment
+ TableScan
+ alias: orderpayment
+ GatherStats: false
+ Reduce Output Operator
+ key expressions:
+ expr: date
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: date
+ type: string
+ tag: 0
+ value expressions:
+ expr: dealid
+ type: int
+ expr: cityid
+ type: int
+ expr: userid
+ type: int
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: orderpayment_small
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns dealid,date,time,cityid,userid
+ columns.types int:string:string:int:int
+#### A masked pattern was here ####
+ name default.orderpayment_small
+ numFiles 1
+ numPartitions 0
+ numRows 1
+ rawDataSize 36
+ serialization.ddl struct orderpayment_small { i32 dealid, string date, string time, i32 cityid, i32 userid}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 37
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns dealid,date,time,cityid,userid
+ columns.types int:string:string:int:int
+#### A masked pattern was here ####
+ name default.orderpayment_small
+ numFiles 1
+ numPartitions 0
+ numRows 1
+ rawDataSize 36
+ serialization.ddl struct orderpayment_small { i32 dealid, string date, string time, i32 cityid, i32 userid}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 37
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.orderpayment_small
+ name: default.orderpayment_small
+ Truncated Path -> Alias:
+ /orderpayment_small [dim_pay_date, orderpayment]
+ Needs Tagging: true
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {VALUE._col0} {VALUE._col3} {VALUE._col4}
+ 1 {VALUE._col1}
+ handleSkewJoin: false
+ outputColumnNames: _col0, _col3, _col4, _col8
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col3,_col4,_col8
+ columns.types int,int,int,string
+ escape.delim \
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+
+ Stage: Stage-2
+ Map Reduce
+ Alias -> Map Operator Tree:
+ $INTNAME
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: int
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: int
+ tag: 0
+ value expressions:
+ expr: _col8
+ type: string
+ expr: _col3
+ type: int
+ expr: _col4
+ type: int
+ deal
+ TableScan
+ alias: deal
+ GatherStats: false
+ Reduce Output Operator
+ key expressions:
+ expr: dealid
+ type: int
+ sort order: +
+ Map-reduce partition columns:
+ expr: dealid
+ type: int
+ tag: 1
+ value expressions:
+ expr: dealid
+ type: int
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: -mr-10002
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col3,_col4,_col8
+ columns.types int,int,int,string
+ escape.delim \
+
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col3,_col4,_col8
+ columns.types int,int,int,string
+ escape.delim \
+#### A masked pattern was here ####
+ Partition
+ base file name: orderpayment_small
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns dealid,date,time,cityid,userid
+ columns.types int:string:string:int:int
+#### A masked pattern was here ####
+ name default.orderpayment_small
+ numFiles 1
+ numPartitions 0
+ numRows 1
+ rawDataSize 36
+ serialization.ddl struct orderpayment_small { i32 dealid, string date, string time, i32 cityid, i32 userid}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 37
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns dealid,date,time,cityid,userid
+ columns.types int:string:string:int:int
+#### A masked pattern was here ####
+ name default.orderpayment_small
+ numFiles 1
+ numPartitions 0
+ numRows 1
+ rawDataSize 36
+ serialization.ddl struct orderpayment_small { i32 dealid, string date, string time, i32 cityid, i32 userid}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 37
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.orderpayment_small
+ name: default.orderpayment_small
+ Truncated Path -> Alias:
+ /orderpayment_small [deal]
+#### A masked pattern was here ####
+ Needs Tagging: true
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {VALUE._col1} {VALUE._col10} {VALUE._col11}
+ 1 {VALUE._col0}
+ handleSkewJoin: false
+ outputColumnNames: _col1, _col10, _col11, _col14
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col1,_col10,_col11,_col14
+ columns.types string,int,int,int
+ escape.delim \
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+
+ Stage: Stage-3
+ Map Reduce
+ Alias -> Map Operator Tree:
+ $INTNAME
+ Reduce Output Operator
+ key expressions:
+ expr: _col10
+ type: int
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col10
+ type: int
+ tag: 0
+ value expressions:
+ expr: _col14
+ type: int
+ expr: _col1
+ type: string
+ expr: _col11
+ type: int
+ order_city
+ TableScan
+ alias: order_city
+ GatherStats: false
+ Reduce Output Operator
+ key expressions:
+ expr: cityid
+ type: int
+ sort order: +
+ Map-reduce partition columns:
+ expr: cityid
+ type: int
+ tag: 1
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: -mr-10003
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col1,_col10,_col11,_col14
+ columns.types string,int,int,int
+ escape.delim \
+
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col1,_col10,_col11,_col14
+ columns.types string,int,int,int
+ escape.delim \
+#### A masked pattern was here ####
+ Partition
+ base file name: orderpayment_small
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns dealid,date,time,cityid,userid
+ columns.types int:string:string:int:int
+#### A masked pattern was here ####
+ name default.orderpayment_small
+ numFiles 1
+ numPartitions 0
+ numRows 1
+ rawDataSize 36
+ serialization.ddl struct orderpayment_small { i32 dealid, string date, string time, i32 cityid, i32 userid}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 37
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns dealid,date,time,cityid,userid
+ columns.types int:string:string:int:int
+#### A masked pattern was here ####
+ name default.orderpayment_small
+ numFiles 1
+ numPartitions 0
+ numRows 1
+ rawDataSize 36
+ serialization.ddl struct orderpayment_small { i32 dealid, string date, string time, i32 cityid, i32 userid}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 37
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.orderpayment_small
+ name: default.orderpayment_small
+ Truncated Path -> Alias:
+ /orderpayment_small [order_city]
+#### A masked pattern was here ####
+ Needs Tagging: true
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {VALUE._col8} {VALUE._col0} {VALUE._col18}
+ 1
+ handleSkewJoin: false
+ outputColumnNames: _col1, _col7, _col18
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col1,_col7,_col18
+ columns.types string,int,int
+ escape.delim \
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+
+ Stage: Stage-4
+ Map Reduce
+ Alias -> Map Operator Tree:
+ $INTNAME
+ Reduce Output Operator
+ key expressions:
+ expr: _col18
+ type: int
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col18
+ type: int
+ tag: 0
+ value expressions:
+ expr: _col7
+ type: int
+ expr: _col1
+ type: string
+ user
+ TableScan
+ alias: user
+ GatherStats: false
+ Reduce Output Operator
+ key expressions:
+ expr: userid
+ type: int
+ sort order: +
+ Map-reduce partition columns:
+ expr: userid
+ type: int
+ tag: 1
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: -mr-10004
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col1,_col7,_col18
+ columns.types string,int,int
+ escape.delim \
+
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col1,_col7,_col18
+ columns.types string,int,int
+ escape.delim \
+#### A masked pattern was here ####
+ Partition
+ base file name: user_small
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns userid
+ columns.types int
+#### A masked pattern was here ####
+ name default.user_small
+ numFiles 1
+ numPartitions 0
+ numRows 100
+ rawDataSize 288
+ serialization.ddl struct user_small { i32 userid}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 388
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns userid
+ columns.types int
+#### A masked pattern was here ####
+ name default.user_small
+ numFiles 1
+ numPartitions 0
+ numRows 100
+ rawDataSize 288
+ serialization.ddl struct user_small { i32 userid}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 388
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.user_small
+ name: default.user_small
+ Truncated Path -> Alias:
+ /user_small [user]
+#### A masked pattern was here ####
+ Needs Tagging: true
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {VALUE._col8} {VALUE._col0}
+ 1
+ handleSkewJoin: false
+ outputColumnNames: _col1, _col7
+ Select Operator
+ expressions:
+ expr: _col1
+ type: string
+ expr: _col7
+ type: int
+ outputColumnNames: _col0, _col1
+ Limit
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ columns _col0,_col1
+ columns.types string:int
+ escape.delim \
+ hive.serialization.extend.nesting.levels true
+ serialization.format 1
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 5
+
+
+PREHOOK: query: SELECT
+ `dim_pay_date`.`date`
+ , `deal`.`dealid`
+FROM `orderpayment_small` `orderpayment`
+JOIN `orderpayment_small` `dim_pay_date` ON `dim_pay_date`.`date` = `orderpayment`.`date`
+JOIN `orderpayment_small` `deal` ON `deal`.`dealid` = `orderpayment`.`dealid`
+JOIN `orderpayment_small` `order_city` ON `order_city`.`cityid` = `orderpayment`.`cityid`
+JOIN `user_small` `user` ON `user`.`userid` = `orderpayment`.`userid`
+limit 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orderpayment_small
+PREHOOK: Input: default@user_small
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT
+ `dim_pay_date`.`date`
+ , `deal`.`dealid`
+FROM `orderpayment_small` `orderpayment`
+JOIN `orderpayment_small` `dim_pay_date` ON `dim_pay_date`.`date` = `orderpayment`.`date`
+JOIN `orderpayment_small` `deal` ON `deal`.`dealid` = `orderpayment`.`dealid`
+JOIN `orderpayment_small` `order_city` ON `order_city`.`cityid` = `orderpayment`.`cityid`
+JOIN `user_small` `user` ON `user`.`userid` = `orderpayment`.`userid`
+limit 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orderpayment_small
+POSTHOOK: Input: default@user_small
+#### A masked pattern was here ####
+POSTHOOK: Lineage: orderpayment_small.cityid SIMPLE []
+POSTHOOK: Lineage: orderpayment_small.date SIMPLE []
+POSTHOOK: Lineage: orderpayment_small.dealid SIMPLE []
+POSTHOOK: Lineage: orderpayment_small.time SIMPLE []
+POSTHOOK: Lineage: orderpayment_small.userid SIMPLE []
+POSTHOOK: Lineage: user_small.userid SIMPLE [(testsrc)testsrc.FieldSchema(name:key, type:int, comment:null), ]