You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by th...@apache.org on 2013/09/11 23:41:30 UTC
svn commit: r1522058 - in /hive/branches/branch-0.12/ql/src: java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java test/queries/clientpositive/auto_join_reordering_values.q test/results/clientpositive/auto_join_reordering_values.q.out

Author: thejas
Date: Wed Sep 11 21:41:30 2013
New Revision: 1522058

URL: http://svn.apache.org/r1522058
Log:
HIVE-5056 : MapJoinProcessor ignores order of values in removing RS (Navis Ryu via Ashutosh Chauhan)

Added:
    hive/branches/branch-0.12/ql/src/test/queries/clientpositive/auto_join_reordering_values.q
    hive/branches/branch-0.12/ql/src/test/results/clientpositive/auto_join_reordering_values.q.out
Modified:
    hive/branches/branch-0.12/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java

Modified: hive/branches/branch-0.12/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.12/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java?rev=1522058&r1=1522057&r2=1522058&view=diff
==============================================================================
--- hive/branches/branch-0.12/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java (original)
+++ hive/branches/branch-0.12/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java Wed Sep 11 21:41:30 2013
@@ -333,11 +333,8 @@ public class MapJoinProcessor implements
       }
     }
 
-    RowResolver oldOutputRS = opParseCtxMap.get(op).getRowResolver();
-    RowResolver outputRS = new RowResolver();
-    ArrayList<String> outputColumnNames = new ArrayList<String>();
+    RowResolver outputRS = opParseCtxMap.get(op).getRowResolver();
     Map<Byte, List<ExprNodeDesc>> keyExprMap = new HashMap<Byte, List<ExprNodeDesc>>();
-    Map<Byte, List<ExprNodeDesc>> valueExprMap = new HashMap<Byte, List<ExprNodeDesc>>();
 
     // Walk over all the sources (which are guaranteed to be reduce sink
     // operators).
@@ -349,7 +346,6 @@ public class MapJoinProcessor implements
       new ArrayList<Operator<? extends OperatorDesc>>();
     List<Operator<? extends OperatorDesc>> oldReduceSinkParentOps =
        new ArrayList<Operator<? extends OperatorDesc>>();
-    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
 
     // found a source which is not to be stored in memory
     if (leftSrc != null) {
@@ -385,37 +381,34 @@ public class MapJoinProcessor implements
       keyExprMap.put(pos, keys);
     }
 
-    // create the map-join operator
-    for (pos = 0; pos < newParentOps.size(); pos++) {
-      RowResolver inputRS = opParseCtxMap.get(newParentOps.get(pos)).getRowResolver();
-      List<ExprNodeDesc> values = new ArrayList<ExprNodeDesc>();
-
-      Iterator<String> keysIter = inputRS.getTableNames().iterator();
-      while (keysIter.hasNext()) {
-        String key = keysIter.next();
-        HashMap<String, ColumnInfo> rrMap = inputRS.getFieldMap(key);
-        Iterator<String> fNamesIter = rrMap.keySet().iterator();
-        while (fNamesIter.hasNext()) {
-          String field = fNamesIter.next();
-          ColumnInfo valueInfo = inputRS.get(key, field);
-          ColumnInfo oldValueInfo = oldOutputRS.get(key, field);
-          if (oldValueInfo == null) {
-            continue;
-          }
-          String outputCol = oldValueInfo.getInternalName();
-          if (outputRS.get(key, field) == null) {
-            outputColumnNames.add(outputCol);
-            ExprNodeDesc colDesc = new ExprNodeColumnDesc(valueInfo.getType(), valueInfo
-                .getInternalName(), valueInfo.getTabAlias(), valueInfo.getIsVirtualCol());
-            values.add(colDesc);
-            outputRS.put(key, field, new ColumnInfo(outputCol, valueInfo.getType(), valueInfo
-                .getTabAlias(), valueInfo.getIsVirtualCol(), valueInfo.isHiddenVirtualCol()));
-            colExprMap.put(outputCol, colDesc);
-          }
+    // removing RS, only ExprNodeDesc is changed (key/value/filter exprs and colExprMap)
+    // others (output column-name, RR, schema) remain intact
+    Map<String, ExprNodeDesc> colExprMap = op.getColumnExprMap();
+    List<String> outputColumnNames = op.getConf().getOutputColumnNames();
+
+    List<ColumnInfo> schema = new ArrayList<ColumnInfo>(op.getSchema().getSignature());
+
+    Map<Byte, List<ExprNodeDesc>> valueExprs = op.getConf().getExprs();
+    Map<Byte, List<ExprNodeDesc>> newValueExprs = new HashMap<Byte, List<ExprNodeDesc>>();
+    for (Map.Entry<Byte, List<ExprNodeDesc>> entry : valueExprs.entrySet()) {
+      byte tag = entry.getKey();
+      Operator<?> terminal = oldReduceSinkParentOps.get(tag);
+
+      List<ExprNodeDesc> values = entry.getValue();
+      List<ExprNodeDesc> newValues = ExprNodeDescUtils.backtrack(values, op, terminal);
+      newValueExprs.put(tag, newValues);
+      for (int i = 0; i < schema.size(); i++) {
+        ColumnInfo column = schema.get(i);
+        if (column == null) {
+          continue;
+        }
+        ExprNodeDesc expr = colExprMap.get(column.getInternalName());
+        int index = ExprNodeDescUtils.indexOf(expr, values);
+        if (index >= 0) {
+          colExprMap.put(column.getInternalName(), newValues.get(index));
+          schema.set(i, null);
         }
       }
-
-      valueExprMap.put(Byte.valueOf((byte) pos), values);
     }
 
     Map<Byte, List<ExprNodeDesc>> filters = desc.getFilters();
@@ -456,7 +449,7 @@ public class MapJoinProcessor implements
 
     int[][] filterMap = desc.getFilterMap();
     for (pos = 0; pos < newParentOps.size(); pos++) {
-      List<ExprNodeDesc> valueCols = valueExprMap.get(Byte.valueOf((byte) pos));
+      List<ExprNodeDesc> valueCols = newValueExprs.get(pos);
       int length = valueCols.size();
       List<ExprNodeDesc> valueFilteredCols = new ArrayList<ExprNodeDesc>(length);
       // deep copy expr node desc
@@ -492,7 +485,7 @@ public class MapJoinProcessor implements
     } else {
       dumpFilePrefix = "mapfile"+PlanUtils.getCountForMapJoinDumpFilePrefix();
     }
-    MapJoinDesc mapJoinDescriptor = new MapJoinDesc(keyExprMap, keyTableDesc, valueExprMap,
+    MapJoinDesc mapJoinDescriptor = new MapJoinDesc(keyExprMap, keyTableDesc, newValueExprs,
         valueTableDescs, valueFiltedTableDescs, outputColumnNames, mapJoinPos, joinCondns,
         filters, op.getConf().getNoOuterJoin(), dumpFilePrefix);
     mapJoinDescriptor.setTagOrder(tagOrder);

Added: hive/branches/branch-0.12/ql/src/test/queries/clientpositive/auto_join_reordering_values.q
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.12/ql/src/test/queries/clientpositive/auto_join_reordering_values.q?rev=1522058&view=auto
==============================================================================
--- hive/branches/branch-0.12/ql/src/test/queries/clientpositive/auto_join_reordering_values.q (added)
+++ hive/branches/branch-0.12/ql/src/test/queries/clientpositive/auto_join_reordering_values.q Wed Sep 11 21:41:30 2013
@@ -0,0 +1,31 @@
+-- HIVE-5056 RS has expression list for values, but it's ignored in MapJoinProcessor
+
+create table testsrc ( `key` int,`val` string);
+load data local inpath '../data/files/kv1.txt' overwrite into table testsrc;
+drop table if exists orderpayment_small;
+create table orderpayment_small (`dealid` int,`date` string,`time` string, `cityid` int, `userid` int);
+insert overwrite table orderpayment_small select 748, '2011-03-24', '2011-03-24', 55 ,5372613 from testsrc limit 1;
+drop table if exists user_small;
+create table user_small( userid int);
+insert overwrite table user_small select key from testsrc limit 100;
+
+set hive.auto.convert.join.noconditionaltask.size = 200;
+explain extended SELECT
+     `dim_pay_date`.`date`
+    , `deal`.`dealid`
+FROM `orderpayment_small` `orderpayment`
+JOIN `orderpayment_small` `dim_pay_date` ON `dim_pay_date`.`date` = `orderpayment`.`date`
+JOIN `orderpayment_small` `deal` ON `deal`.`dealid` = `orderpayment`.`dealid`
+JOIN `orderpayment_small` `order_city` ON `order_city`.`cityid` = `orderpayment`.`cityid`
+JOIN `user_small` `user` ON `user`.`userid` = `orderpayment`.`userid`
+limit 5;
+
+SELECT
+     `dim_pay_date`.`date`
+    , `deal`.`dealid`
+FROM `orderpayment_small` `orderpayment`
+JOIN `orderpayment_small` `dim_pay_date` ON `dim_pay_date`.`date` = `orderpayment`.`date`
+JOIN `orderpayment_small` `deal` ON `deal`.`dealid` = `orderpayment`.`dealid`
+JOIN `orderpayment_small` `order_city` ON `order_city`.`cityid` = `orderpayment`.`cityid`
+JOIN `user_small` `user` ON `user`.`userid` = `orderpayment`.`userid`
+limit 5;

Added: hive/branches/branch-0.12/ql/src/test/results/clientpositive/auto_join_reordering_values.q.out
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.12/ql/src/test/results/clientpositive/auto_join_reordering_values.q.out?rev=1522058&view=auto
==============================================================================
--- hive/branches/branch-0.12/ql/src/test/results/clientpositive/auto_join_reordering_values.q.out (added)
+++ hive/branches/branch-0.12/ql/src/test/results/clientpositive/auto_join_reordering_values.q.out Wed Sep 11 21:41:30 2013
@@ -0,0 +1,637 @@
+PREHOOK: query: -- HIVE-5056 RS has expression list for values, but it's ignored in MapJoinProcessor
+
+create table testsrc ( `key` int,`val` string)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- HIVE-5056 RS has expression list for values, but it's ignored in MapJoinProcessor
+
+create table testsrc ( `key` int,`val` string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@testsrc
+PREHOOK: query: load data local inpath '../data/files/kv1.txt' overwrite into table testsrc
+PREHOOK: type: LOAD
+PREHOOK: Output: default@testsrc
+POSTHOOK: query: load data local inpath '../data/files/kv1.txt' overwrite into table testsrc
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@testsrc
+PREHOOK: query: drop table if exists orderpayment_small
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists orderpayment_small
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table orderpayment_small (`dealid` int,`date` string,`time` string, `cityid` int, `userid` int)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table orderpayment_small (`dealid` int,`date` string,`time` string, `cityid` int, `userid` int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@orderpayment_small
+PREHOOK: query: insert overwrite table orderpayment_small select 748, '2011-03-24', '2011-03-24', 55 ,5372613 from testsrc limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@testsrc
+PREHOOK: Output: default@orderpayment_small
+POSTHOOK: query: insert overwrite table orderpayment_small select 748, '2011-03-24', '2011-03-24', 55 ,5372613 from testsrc limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@testsrc
+POSTHOOK: Output: default@orderpayment_small
+POSTHOOK: Lineage: orderpayment_small.cityid SIMPLE []
+POSTHOOK: Lineage: orderpayment_small.date SIMPLE []
+POSTHOOK: Lineage: orderpayment_small.dealid SIMPLE []
+POSTHOOK: Lineage: orderpayment_small.time SIMPLE []
+POSTHOOK: Lineage: orderpayment_small.userid SIMPLE []
+PREHOOK: query: drop table if exists user_small
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists user_small
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Lineage: orderpayment_small.cityid SIMPLE []
+POSTHOOK: Lineage: orderpayment_small.date SIMPLE []
+POSTHOOK: Lineage: orderpayment_small.dealid SIMPLE []
+POSTHOOK: Lineage: orderpayment_small.time SIMPLE []
+POSTHOOK: Lineage: orderpayment_small.userid SIMPLE []
+PREHOOK: query: create table user_small( userid int)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table user_small( userid int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@user_small
+POSTHOOK: Lineage: orderpayment_small.cityid SIMPLE []
+POSTHOOK: Lineage: orderpayment_small.date SIMPLE []
+POSTHOOK: Lineage: orderpayment_small.dealid SIMPLE []
+POSTHOOK: Lineage: orderpayment_small.time SIMPLE []
+POSTHOOK: Lineage: orderpayment_small.userid SIMPLE []
+PREHOOK: query: insert overwrite table user_small select key from testsrc limit 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@testsrc
+PREHOOK: Output: default@user_small
+POSTHOOK: query: insert overwrite table user_small select key from testsrc limit 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@testsrc
+POSTHOOK: Output: default@user_small
+POSTHOOK: Lineage: orderpayment_small.cityid SIMPLE []
+POSTHOOK: Lineage: orderpayment_small.date SIMPLE []
+POSTHOOK: Lineage: orderpayment_small.dealid SIMPLE []
+POSTHOOK: Lineage: orderpayment_small.time SIMPLE []
+POSTHOOK: Lineage: orderpayment_small.userid SIMPLE []
+POSTHOOK: Lineage: user_small.userid SIMPLE [(testsrc)testsrc.FieldSchema(name:key, type:int, comment:null), ]
+PREHOOK: query: explain extended SELECT
+     `dim_pay_date`.`date`
+    , `deal`.`dealid`
+FROM `orderpayment_small` `orderpayment`
+JOIN `orderpayment_small` `dim_pay_date` ON `dim_pay_date`.`date` = `orderpayment`.`date`
+JOIN `orderpayment_small` `deal` ON `deal`.`dealid` = `orderpayment`.`dealid`
+JOIN `orderpayment_small` `order_city` ON `order_city`.`cityid` = `orderpayment`.`cityid`
+JOIN `user_small` `user` ON `user`.`userid` = `orderpayment`.`userid`
+limit 5
+PREHOOK: type: QUERY
+POSTHOOK: query: explain extended SELECT
+     `dim_pay_date`.`date`
+    , `deal`.`dealid`
+FROM `orderpayment_small` `orderpayment`
+JOIN `orderpayment_small` `dim_pay_date` ON `dim_pay_date`.`date` = `orderpayment`.`date`
+JOIN `orderpayment_small` `deal` ON `deal`.`dealid` = `orderpayment`.`dealid`
+JOIN `orderpayment_small` `order_city` ON `order_city`.`cityid` = `orderpayment`.`cityid`
+JOIN `user_small` `user` ON `user`.`userid` = `orderpayment`.`userid`
+limit 5
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: orderpayment_small.cityid SIMPLE []
+POSTHOOK: Lineage: orderpayment_small.date SIMPLE []
+POSTHOOK: Lineage: orderpayment_small.dealid SIMPLE []
+POSTHOOK: Lineage: orderpayment_small.time SIMPLE []
+POSTHOOK: Lineage: orderpayment_small.userid SIMPLE []
+POSTHOOK: Lineage: user_small.userid SIMPLE [(testsrc)testsrc.FieldSchema(name:key, type:int, comment:null), ]
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME `orderpayment_small`) `orderpayment`) (TOK_TABREF (TOK_TABNAME `orderpayment_small`) `dim_pay_date`) (= (. (TOK_TABLE_OR_COL `dim_pay_date`) `date`) (. (TOK_TABLE_OR_COL `orderpayment`) `date`))) (TOK_TABREF (TOK_TABNAME `orderpayment_small`) `deal`) (= (. (TOK_TABLE_OR_COL `deal`) `dealid`) (. (TOK_TABLE_OR_COL `orderpayment`) `dealid`))) (TOK_TABREF (TOK_TABNAME `orderpayment_small`) `order_city`) (= (. (TOK_TABLE_OR_COL `order_city`) `cityid`) (. (TOK_TABLE_OR_COL `orderpayment`) `cityid`))) (TOK_TABREF (TOK_TABNAME `user_small`) `user`) (= (. (TOK_TABLE_OR_COL `user`) `userid`) (. (TOK_TABLE_OR_COL `orderpayment`) `userid`)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL `dim_pay_date`) `date`)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL `deal`) `dealid`))) (TOK_LIMIT 5)))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-3 depends on stages: Stage-2
+  Stage-4 depends on stages: Stage-3
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        dim_pay_date 
+          TableScan
+            alias: dim_pay_date
+            GatherStats: false
+            Reduce Output Operator
+              key expressions:
+                    expr: date
+                    type: string
+              sort order: +
+              Map-reduce partition columns:
+                    expr: date
+                    type: string
+              tag: 1
+              value expressions:
+                    expr: date
+                    type: string
+        orderpayment 
+          TableScan
+            alias: orderpayment
+            GatherStats: false
+            Reduce Output Operator
+              key expressions:
+                    expr: date
+                    type: string
+              sort order: +
+              Map-reduce partition columns:
+                    expr: date
+                    type: string
+              tag: 0
+              value expressions:
+                    expr: dealid
+                    type: int
+                    expr: cityid
+                    type: int
+                    expr: userid
+                    type: int
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
+          Partition
+            base file name: orderpayment_small
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            properties:
+              bucket_count -1
+              columns dealid,date,time,cityid,userid
+              columns.types int:string:string:int:int
+#### A masked pattern was here ####
+              name default.orderpayment_small
+              numFiles 1
+              numPartitions 0
+              numRows 1
+              rawDataSize 36
+              serialization.ddl struct orderpayment_small { i32 dealid, string date, string time, i32 cityid, i32 userid}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 37
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                bucket_count -1
+                columns dealid,date,time,cityid,userid
+                columns.types int:string:string:int:int
+#### A masked pattern was here ####
+                name default.orderpayment_small
+                numFiles 1
+                numPartitions 0
+                numRows 1
+                rawDataSize 36
+                serialization.ddl struct orderpayment_small { i32 dealid, string date, string time, i32 cityid, i32 userid}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                totalSize 37
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.orderpayment_small
+            name: default.orderpayment_small
+      Truncated Path -> Alias:
+        /orderpayment_small [dim_pay_date, orderpayment]
+      Needs Tagging: true
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          condition expressions:
+            0 {VALUE._col0} {VALUE._col3} {VALUE._col4}
+            1 {VALUE._col1}
+          handleSkewJoin: false
+          outputColumnNames: _col0, _col3, _col4, _col8
+          File Output Operator
+            compressed: false
+            GlobalTableId: 0
+#### A masked pattern was here ####
+            NumFilesPerFileSink: 1
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                properties:
+                  columns _col0,_col3,_col4,_col8
+                  columns.types int,int,int,string
+                  escape.delim \
+            TotalFiles: 1
+            GatherStats: false
+            MultiFileSpray: false
+
+  Stage: Stage-2
+    Map Reduce
+      Alias -> Map Operator Tree:
+        $INTNAME 
+            Reduce Output Operator
+              key expressions:
+                    expr: _col0
+                    type: int
+              sort order: +
+              Map-reduce partition columns:
+                    expr: _col0
+                    type: int
+              tag: 0
+              value expressions:
+                    expr: _col8
+                    type: string
+                    expr: _col3
+                    type: int
+                    expr: _col4
+                    type: int
+        deal 
+          TableScan
+            alias: deal
+            GatherStats: false
+            Reduce Output Operator
+              key expressions:
+                    expr: dealid
+                    type: int
+              sort order: +
+              Map-reduce partition columns:
+                    expr: dealid
+                    type: int
+              tag: 1
+              value expressions:
+                    expr: dealid
+                    type: int
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
+          Partition
+            base file name: -mr-10002
+            input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+            properties:
+              columns _col0,_col3,_col4,_col8
+              columns.types int,int,int,string
+              escape.delim \
+          
+              input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+              properties:
+                columns _col0,_col3,_col4,_col8
+                columns.types int,int,int,string
+                escape.delim \
+#### A masked pattern was here ####
+          Partition
+            base file name: orderpayment_small
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            properties:
+              bucket_count -1
+              columns dealid,date,time,cityid,userid
+              columns.types int:string:string:int:int
+#### A masked pattern was here ####
+              name default.orderpayment_small
+              numFiles 1
+              numPartitions 0
+              numRows 1
+              rawDataSize 36
+              serialization.ddl struct orderpayment_small { i32 dealid, string date, string time, i32 cityid, i32 userid}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 37
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                bucket_count -1
+                columns dealid,date,time,cityid,userid
+                columns.types int:string:string:int:int
+#### A masked pattern was here ####
+                name default.orderpayment_small
+                numFiles 1
+                numPartitions 0
+                numRows 1
+                rawDataSize 36
+                serialization.ddl struct orderpayment_small { i32 dealid, string date, string time, i32 cityid, i32 userid}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                totalSize 37
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.orderpayment_small
+            name: default.orderpayment_small
+      Truncated Path -> Alias:
+        /orderpayment_small [deal]
+#### A masked pattern was here ####
+      Needs Tagging: true
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          condition expressions:
+            0 {VALUE._col1} {VALUE._col10} {VALUE._col11}
+            1 {VALUE._col0}
+          handleSkewJoin: false
+          outputColumnNames: _col1, _col10, _col11, _col14
+          File Output Operator
+            compressed: false
+            GlobalTableId: 0
+#### A masked pattern was here ####
+            NumFilesPerFileSink: 1
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                properties:
+                  columns _col1,_col10,_col11,_col14
+                  columns.types string,int,int,int
+                  escape.delim \
+            TotalFiles: 1
+            GatherStats: false
+            MultiFileSpray: false
+
+  Stage: Stage-3
+    Map Reduce
+      Alias -> Map Operator Tree:
+        $INTNAME 
+            Reduce Output Operator
+              key expressions:
+                    expr: _col10
+                    type: int
+              sort order: +
+              Map-reduce partition columns:
+                    expr: _col10
+                    type: int
+              tag: 0
+              value expressions:
+                    expr: _col14
+                    type: int
+                    expr: _col1
+                    type: string
+                    expr: _col11
+                    type: int
+        order_city 
+          TableScan
+            alias: order_city
+            GatherStats: false
+            Reduce Output Operator
+              key expressions:
+                    expr: cityid
+                    type: int
+              sort order: +
+              Map-reduce partition columns:
+                    expr: cityid
+                    type: int
+              tag: 1
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
+          Partition
+            base file name: -mr-10003
+            input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+            properties:
+              columns _col1,_col10,_col11,_col14
+              columns.types string,int,int,int
+              escape.delim \
+          
+              input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+              properties:
+                columns _col1,_col10,_col11,_col14
+                columns.types string,int,int,int
+                escape.delim \
+#### A masked pattern was here ####
+          Partition
+            base file name: orderpayment_small
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            properties:
+              bucket_count -1
+              columns dealid,date,time,cityid,userid
+              columns.types int:string:string:int:int
+#### A masked pattern was here ####
+              name default.orderpayment_small
+              numFiles 1
+              numPartitions 0
+              numRows 1
+              rawDataSize 36
+              serialization.ddl struct orderpayment_small { i32 dealid, string date, string time, i32 cityid, i32 userid}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 37
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                bucket_count -1
+                columns dealid,date,time,cityid,userid
+                columns.types int:string:string:int:int
+#### A masked pattern was here ####
+                name default.orderpayment_small
+                numFiles 1
+                numPartitions 0
+                numRows 1
+                rawDataSize 36
+                serialization.ddl struct orderpayment_small { i32 dealid, string date, string time, i32 cityid, i32 userid}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                totalSize 37
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.orderpayment_small
+            name: default.orderpayment_small
+      Truncated Path -> Alias:
+        /orderpayment_small [order_city]
+#### A masked pattern was here ####
+      Needs Tagging: true
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          condition expressions:
+            0 {VALUE._col8} {VALUE._col0} {VALUE._col18}
+            1 
+          handleSkewJoin: false
+          outputColumnNames: _col1, _col7, _col18
+          File Output Operator
+            compressed: false
+            GlobalTableId: 0
+#### A masked pattern was here ####
+            NumFilesPerFileSink: 1
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                properties:
+                  columns _col1,_col7,_col18
+                  columns.types string,int,int
+                  escape.delim \
+            TotalFiles: 1
+            GatherStats: false
+            MultiFileSpray: false
+
+  Stage: Stage-4
+    Map Reduce
+      Alias -> Map Operator Tree:
+        $INTNAME 
+            Reduce Output Operator
+              key expressions:
+                    expr: _col18
+                    type: int
+              sort order: +
+              Map-reduce partition columns:
+                    expr: _col18
+                    type: int
+              tag: 0
+              value expressions:
+                    expr: _col7
+                    type: int
+                    expr: _col1
+                    type: string
+        user 
+          TableScan
+            alias: user
+            GatherStats: false
+            Reduce Output Operator
+              key expressions:
+                    expr: userid
+                    type: int
+              sort order: +
+              Map-reduce partition columns:
+                    expr: userid
+                    type: int
+              tag: 1
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
+          Partition
+            base file name: -mr-10004
+            input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+            properties:
+              columns _col1,_col7,_col18
+              columns.types string,int,int
+              escape.delim \
+          
+              input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+              properties:
+                columns _col1,_col7,_col18
+                columns.types string,int,int
+                escape.delim \
+#### A masked pattern was here ####
+          Partition
+            base file name: user_small
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            properties:
+              bucket_count -1
+              columns userid
+              columns.types int
+#### A masked pattern was here ####
+              name default.user_small
+              numFiles 1
+              numPartitions 0
+              numRows 100
+              rawDataSize 288
+              serialization.ddl struct user_small { i32 userid}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 388
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                bucket_count -1
+                columns userid
+                columns.types int
+#### A masked pattern was here ####
+                name default.user_small
+                numFiles 1
+                numPartitions 0
+                numRows 100
+                rawDataSize 288
+                serialization.ddl struct user_small { i32 userid}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                totalSize 388
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.user_small
+            name: default.user_small
+      Truncated Path -> Alias:
+        /user_small [user]
+#### A masked pattern was here ####
+      Needs Tagging: true
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          condition expressions:
+            0 {VALUE._col8} {VALUE._col0}
+            1 
+          handleSkewJoin: false
+          outputColumnNames: _col1, _col7
+          Select Operator
+            expressions:
+                  expr: _col1
+                  type: string
+                  expr: _col7
+                  type: int
+            outputColumnNames: _col0, _col1
+            Limit
+              File Output Operator
+                compressed: false
+                GlobalTableId: 0
+#### A masked pattern was here ####
+                NumFilesPerFileSink: 1
+#### A masked pattern was here ####
+                table:
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    properties:
+                      columns _col0,_col1
+                      columns.types string:int
+                      escape.delim \
+                      hive.serialization.extend.nesting.levels true
+                      serialization.format 1
+                TotalFiles: 1
+                GatherStats: false
+                MultiFileSpray: false
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 5
+
+
+PREHOOK: query: SELECT
+     `dim_pay_date`.`date`
+    , `deal`.`dealid`
+FROM `orderpayment_small` `orderpayment`
+JOIN `orderpayment_small` `dim_pay_date` ON `dim_pay_date`.`date` = `orderpayment`.`date`
+JOIN `orderpayment_small` `deal` ON `deal`.`dealid` = `orderpayment`.`dealid`
+JOIN `orderpayment_small` `order_city` ON `order_city`.`cityid` = `orderpayment`.`cityid`
+JOIN `user_small` `user` ON `user`.`userid` = `orderpayment`.`userid`
+limit 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orderpayment_small
+PREHOOK: Input: default@user_small
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT
+     `dim_pay_date`.`date`
+    , `deal`.`dealid`
+FROM `orderpayment_small` `orderpayment`
+JOIN `orderpayment_small` `dim_pay_date` ON `dim_pay_date`.`date` = `orderpayment`.`date`
+JOIN `orderpayment_small` `deal` ON `deal`.`dealid` = `orderpayment`.`dealid`
+JOIN `orderpayment_small` `order_city` ON `order_city`.`cityid` = `orderpayment`.`cityid`
+JOIN `user_small` `user` ON `user`.`userid` = `orderpayment`.`userid`
+limit 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orderpayment_small
+POSTHOOK: Input: default@user_small
+#### A masked pattern was here ####
+POSTHOOK: Lineage: orderpayment_small.cityid SIMPLE []
+POSTHOOK: Lineage: orderpayment_small.date SIMPLE []
+POSTHOOK: Lineage: orderpayment_small.dealid SIMPLE []
+POSTHOOK: Lineage: orderpayment_small.time SIMPLE []
+POSTHOOK: Lineage: orderpayment_small.userid SIMPLE []
+POSTHOOK: Lineage: user_small.userid SIMPLE [(testsrc)testsrc.FieldSchema(name:key, type:int, comment:null), ]