You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by rh...@apache.org on 2013/11/18 20:29:27 UTC
svn commit: r1543120 [5/16] - in /hive/trunk:
common/src/java/org/apache/hadoop/hive/conf/ data/files/
ql/src/java/org/apache/hadoop/hive/ql/
ql/src/java/org/apache/hadoop/hive/ql/exec/
ql/src/java/org/apache/hadoop/hive/ql/optimizer/ ql/src/java/org/a...
Added: hive/trunk/ql/src/test/results/clientpositive/annotate_stats_join.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/annotate_stats_join.q.out?rev=1543120&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/annotate_stats_join.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/annotate_stats_join.q.out Mon Nov 18 19:29:24 2013
@@ -0,0 +1,1813 @@
+PREHOOK: query: create table if not exists emp_staging (
+ lastname string,
+ deptid int
+) row format delimited fields terminated by '|' stored as textfile
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table if not exists emp_staging (
+ lastname string,
+ deptid int
+) row format delimited fields terminated by '|' stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@emp_staging
+PREHOOK: query: create table if not exists dept_staging (
+ deptid int,
+ deptname string
+) row format delimited fields terminated by '|' stored as textfile
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table if not exists dept_staging (
+ deptid int,
+ deptname string
+) row format delimited fields terminated by '|' stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@dept_staging
+PREHOOK: query: create table if not exists loc_staging (
+ state string,
+ locid int,
+ zip bigint,
+ year int
+) row format delimited fields terminated by '|' stored as textfile
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table if not exists loc_staging (
+ state string,
+ locid int,
+ zip bigint,
+ year int
+) row format delimited fields terminated by '|' stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@loc_staging
+PREHOOK: query: create table if not exists emp_orc like emp_staging
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table if not exists emp_orc like emp_staging
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@emp_orc
+PREHOOK: query: alter table emp_orc set fileformat orc
+PREHOOK: type: ALTERTABLE_FILEFORMAT
+PREHOOK: Input: default@emp_orc
+PREHOOK: Output: default@emp_orc
+POSTHOOK: query: alter table emp_orc set fileformat orc
+POSTHOOK: type: ALTERTABLE_FILEFORMAT
+POSTHOOK: Input: default@emp_orc
+POSTHOOK: Output: default@emp_orc
+PREHOOK: query: create table if not exists dept_orc like dept_staging
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table if not exists dept_orc like dept_staging
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@dept_orc
+PREHOOK: query: alter table dept_orc set fileformat orc
+PREHOOK: type: ALTERTABLE_FILEFORMAT
+PREHOOK: Input: default@dept_orc
+PREHOOK: Output: default@dept_orc
+POSTHOOK: query: alter table dept_orc set fileformat orc
+POSTHOOK: type: ALTERTABLE_FILEFORMAT
+POSTHOOK: Input: default@dept_orc
+POSTHOOK: Output: default@dept_orc
+PREHOOK: query: create table loc_orc like loc_staging
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table loc_orc like loc_staging
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@loc_orc
+PREHOOK: query: alter table loc_orc set fileformat orc
+PREHOOK: type: ALTERTABLE_FILEFORMAT
+PREHOOK: Input: default@loc_orc
+PREHOOK: Output: default@loc_orc
+POSTHOOK: query: alter table loc_orc set fileformat orc
+POSTHOOK: type: ALTERTABLE_FILEFORMAT
+POSTHOOK: Input: default@loc_orc
+POSTHOOK: Output: default@loc_orc
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/emp.txt' OVERWRITE INTO TABLE emp_staging
+PREHOOK: type: LOAD
+PREHOOK: Output: default@emp_staging
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/emp.txt' OVERWRITE INTO TABLE emp_staging
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@emp_staging
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/dept.txt' OVERWRITE INTO TABLE dept_staging
+PREHOOK: type: LOAD
+PREHOOK: Output: default@dept_staging
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/dept.txt' OVERWRITE INTO TABLE dept_staging
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@dept_staging
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/loc.txt' OVERWRITE INTO TABLE loc_staging
+PREHOOK: type: LOAD
+PREHOOK: Output: default@loc_staging
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/loc.txt' OVERWRITE INTO TABLE loc_staging
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@loc_staging
+PREHOOK: query: insert overwrite table emp_orc select * from emp_staging
+PREHOOK: type: QUERY
+PREHOOK: Input: default@emp_staging
+PREHOOK: Output: default@emp_orc
+POSTHOOK: query: insert overwrite table emp_orc select * from emp_staging
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@emp_staging
+POSTHOOK: Output: default@emp_orc
+POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ]
+POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ]
+PREHOOK: query: insert overwrite table dept_orc select * from dept_staging
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dept_staging
+PREHOOK: Output: default@dept_orc
+POSTHOOK: query: insert overwrite table dept_orc select * from dept_staging
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dept_staging
+POSTHOOK: Output: default@dept_orc
+POSTHOOK: Lineage: dept_orc.deptid SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptid, type:int, comment:null), ]
+POSTHOOK: Lineage: dept_orc.deptname SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptname, type:string, comment:null), ]
+POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ]
+POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ]
+PREHOOK: query: insert overwrite table loc_orc select * from loc_staging
+PREHOOK: type: QUERY
+PREHOOK: Input: default@loc_staging
+PREHOOK: Output: default@loc_orc
+POSTHOOK: query: insert overwrite table loc_orc select * from loc_staging
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@loc_staging
+POSTHOOK: Output: default@loc_orc
+POSTHOOK: Lineage: dept_orc.deptid SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptid, type:int, comment:null), ]
+POSTHOOK: Lineage: dept_orc.deptname SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptname, type:string, comment:null), ]
+POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ]
+POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ]
+POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ]
+POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ]
+POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ]
+POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ]
+PREHOOK: query: analyze table emp_orc compute statistics for columns lastname,deptid
+PREHOOK: type: QUERY
+PREHOOK: Input: default@emp_orc
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table emp_orc compute statistics for columns lastname,deptid
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@emp_orc
+#### A masked pattern was here ####
+POSTHOOK: Lineage: dept_orc.deptid SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptid, type:int, comment:null), ]
+POSTHOOK: Lineage: dept_orc.deptname SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptname, type:string, comment:null), ]
+POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ]
+POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ]
+POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ]
+POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ]
+POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ]
+POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ]
+PREHOOK: query: analyze table dept_orc compute statistics for columns deptname,deptid
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dept_orc
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table dept_orc compute statistics for columns deptname,deptid
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dept_orc
+#### A masked pattern was here ####
+POSTHOOK: Lineage: dept_orc.deptid SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptid, type:int, comment:null), ]
+POSTHOOK: Lineage: dept_orc.deptname SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptname, type:string, comment:null), ]
+POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ]
+POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ]
+POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ]
+POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ]
+POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ]
+POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ]
+PREHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year
+PREHOOK: type: QUERY
+PREHOOK: Input: default@loc_orc
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@loc_orc
+#### A masked pattern was here ####
+POSTHOOK: Lineage: dept_orc.deptid SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptid, type:int, comment:null), ]
+POSTHOOK: Lineage: dept_orc.deptname SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptname, type:string, comment:null), ]
+POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ]
+POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ]
+POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ]
+POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ]
+POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ]
+POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ]
+PREHOOK: query: -- number of rows
+-- emp_orc - 6
+-- dept_orc - 4
+-- loc_orc - 8
+
+-- count distincts for relevant columns (since count distinct values are approximate in some cases count distint values will be greater than number of rows)
+-- emp_orc.deptid - 3
+-- emp_orc.lastname - 7
+-- dept_orc.deptid - 6
+-- dept_orc.deptname - 5
+-- loc_orc.locid - 6
+-- loc_orc.state - 7
+
+-- Expected output rows: 4
+-- Reason: #rows = (6*4)/max(3,6)
+explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid)
+PREHOOK: type: QUERY
+POSTHOOK: query: -- number of rows
+-- emp_orc - 6
+-- dept_orc - 4
+-- loc_orc - 8
+
+-- count distincts for relevant columns (since count distinct values are approximate in some cases count distint values will be greater than number of rows)
+-- emp_orc.deptid - 3
+-- emp_orc.lastname - 7
+-- dept_orc.deptid - 6
+-- dept_orc.deptname - 5
+-- loc_orc.locid - 6
+-- loc_orc.state - 7
+
+-- Expected output rows: 4
+-- Reason: #rows = (6*4)/max(3,6)
+explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid)
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: dept_orc.deptid SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptid, type:int, comment:null), ]
+POSTHOOK: Lineage: dept_orc.deptname SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptname, type:string, comment:null), ]
+POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ]
+POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ]
+POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ]
+POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ]
+POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ]
+POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME emp_orc) e) (TOK_TABREF (TOK_TABNAME dept_orc) d) (= (. (TOK_TABLE_OR_COL e) deptid) (. (TOK_TABLE_OR_COL d) deptid)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ d
+ TableScan
+ alias: d
+ Statistics:
+ numRows: 4 dataSize: 384 basicStatsState: COMPLETE colStatsState: COMPLETE
+ GatherStats: false
+ Reduce Output Operator
+ key expressions:
+ expr: deptid
+ type: int
+ sort order: +
+ Map-reduce partition columns:
+ expr: deptid
+ type: int
+ Statistics:
+ numRows: 4 dataSize: 384 basicStatsState: COMPLETE colStatsState: COMPLETE
+ tag: 1
+ value expressions:
+ expr: deptid
+ type: int
+ expr: deptname
+ type: string
+ e
+ TableScan
+ alias: e
+ Statistics:
+ numRows: 6 dataSize: 560 basicStatsState: COMPLETE colStatsState: COMPLETE
+ GatherStats: false
+ Reduce Output Operator
+ key expressions:
+ expr: deptid
+ type: int
+ sort order: +
+ Map-reduce partition columns:
+ expr: deptid
+ type: int
+ Statistics:
+ numRows: 6 dataSize: 560 basicStatsState: COMPLETE colStatsState: COMPLETE
+ tag: 0
+ value expressions:
+ expr: lastname
+ type: string
+ expr: deptid
+ type: int
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: dept_orc
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns deptid,deptname
+ columns.types int:string
+ field.delim |
+#### A masked pattern was here ####
+ name default.dept_orc
+ numFiles 1
+ numRows 4
+ rawDataSize 384
+ serialization.ddl struct dept_orc { i32 deptid, string deptname}
+ serialization.format |
+ serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ totalSize 329
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns deptid,deptname
+ columns.types int:string
+ field.delim |
+#### A masked pattern was here ####
+ name default.dept_orc
+ numFiles 1
+ numRows 4
+ rawDataSize 384
+ serialization.ddl struct dept_orc { i32 deptid, string deptname}
+ serialization.format |
+ serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ totalSize 329
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.dept_orc
+ name: default.dept_orc
+#### A masked pattern was here ####
+ Partition
+ base file name: emp_orc
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns lastname,deptid
+ columns.types string:int
+ field.delim |
+#### A masked pattern was here ####
+ name default.emp_orc
+ numFiles 1
+ numRows 6
+ rawDataSize 560
+ serialization.ddl struct emp_orc { string lastname, i32 deptid}
+ serialization.format |
+ serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ totalSize 349
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns lastname,deptid
+ columns.types string:int
+ field.delim |
+#### A masked pattern was here ####
+ name default.emp_orc
+ numFiles 1
+ numRows 6
+ rawDataSize 560
+ serialization.ddl struct emp_orc { string lastname, i32 deptid}
+ serialization.format |
+ serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ totalSize 349
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.emp_orc
+ name: default.emp_orc
+ Truncated Path -> Alias:
+ /dept_orc [d]
+ /emp_orc [e]
+ Needs Tagging: true
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {VALUE._col0} {VALUE._col1}
+ 1 {VALUE._col0} {VALUE._col1}
+ handleSkewJoin: false
+ outputColumnNames: _col0, _col1, _col4, _col5
+ Statistics:
+ numRows: 4 dataSize: 760 basicStatsState: COMPLETE colStatsState: COMPLETE
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: int
+ expr: _col4
+ type: int
+ expr: _col5
+ type: string
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics:
+ numRows: 4 dataSize: 760 basicStatsState: COMPLETE colStatsState: COMPLETE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics:
+ numRows: 4 dataSize: 760 basicStatsState: COMPLETE colStatsState: COMPLETE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ columns _col0,_col1,_col2,_col3
+ columns.types string:int:int:string
+ escape.delim \
+ hive.serialization.extend.nesting.levels true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: -- 3 way join
+-- Expected output rows: 4
+-- Reason: #rows = (6*4*6)/max(3,6)*max(6,3)
+explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid) join emp_orc e1 on (e.deptid = e1.deptid)
+PREHOOK: type: QUERY
+POSTHOOK: query: -- 3 way join
+-- Expected output rows: 4
+-- Reason: #rows = (6*4*6)/max(3,6)*max(6,3)
+explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid) join emp_orc e1 on (e.deptid = e1.deptid)
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: dept_orc.deptid SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptid, type:int, comment:null), ]
+POSTHOOK: Lineage: dept_orc.deptname SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptname, type:string, comment:null), ]
+POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ]
+POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ]
+POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ]
+POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ]
+POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ]
+POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME emp_orc) e) (TOK_TABREF (TOK_TABNAME dept_orc) d) (= (. (TOK_TABLE_OR_COL e) deptid) (. (TOK_TABLE_OR_COL d) deptid))) (TOK_TABREF (TOK_TABNAME emp_orc) e1) (= (. (TOK_TABLE_OR_COL e) deptid) (. (TOK_TABLE_OR_COL e1) deptid)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ d
+ TableScan
+ alias: d
+ Statistics:
+ numRows: 4 dataSize: 384 basicStatsState: COMPLETE colStatsState: COMPLETE
+ GatherStats: false
+ Reduce Output Operator
+ key expressions:
+ expr: deptid
+ type: int
+ sort order: +
+ Map-reduce partition columns:
+ expr: deptid
+ type: int
+ Statistics:
+ numRows: 4 dataSize: 384 basicStatsState: COMPLETE colStatsState: COMPLETE
+ tag: 1
+ value expressions:
+ expr: deptid
+ type: int
+ expr: deptname
+ type: string
+ e
+ TableScan
+ alias: e
+ Statistics:
+ numRows: 6 dataSize: 560 basicStatsState: COMPLETE colStatsState: COMPLETE
+ GatherStats: false
+ Reduce Output Operator
+ key expressions:
+ expr: deptid
+ type: int
+ sort order: +
+ Map-reduce partition columns:
+ expr: deptid
+ type: int
+ Statistics:
+ numRows: 6 dataSize: 560 basicStatsState: COMPLETE colStatsState: COMPLETE
+ tag: 0
+ value expressions:
+ expr: lastname
+ type: string
+ expr: deptid
+ type: int
+ e1
+ TableScan
+ alias: e1
+ Statistics:
+ numRows: 6 dataSize: 560 basicStatsState: COMPLETE colStatsState: COMPLETE
+ GatherStats: false
+ Reduce Output Operator
+ key expressions:
+ expr: deptid
+ type: int
+ sort order: +
+ Map-reduce partition columns:
+ expr: deptid
+ type: int
+ Statistics:
+ numRows: 6 dataSize: 560 basicStatsState: COMPLETE colStatsState: COMPLETE
+ tag: 2
+ value expressions:
+ expr: lastname
+ type: string
+ expr: deptid
+ type: int
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: dept_orc
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns deptid,deptname
+ columns.types int:string
+ field.delim |
+#### A masked pattern was here ####
+ name default.dept_orc
+ numFiles 1
+ numRows 4
+ rawDataSize 384
+ serialization.ddl struct dept_orc { i32 deptid, string deptname}
+ serialization.format |
+ serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ totalSize 329
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns deptid,deptname
+ columns.types int:string
+ field.delim |
+#### A masked pattern was here ####
+ name default.dept_orc
+ numFiles 1
+ numRows 4
+ rawDataSize 384
+ serialization.ddl struct dept_orc { i32 deptid, string deptname}
+ serialization.format |
+ serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ totalSize 329
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.dept_orc
+ name: default.dept_orc
+#### A masked pattern was here ####
+ Partition
+ base file name: emp_orc
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns lastname,deptid
+ columns.types string:int
+ field.delim |
+#### A masked pattern was here ####
+ name default.emp_orc
+ numFiles 1
+ numRows 6
+ rawDataSize 560
+ serialization.ddl struct emp_orc { string lastname, i32 deptid}
+ serialization.format |
+ serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ totalSize 349
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns lastname,deptid
+ columns.types string:int
+ field.delim |
+#### A masked pattern was here ####
+ name default.emp_orc
+ numFiles 1
+ numRows 6
+ rawDataSize 560
+ serialization.ddl struct emp_orc { string lastname, i32 deptid}
+ serialization.format |
+ serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ totalSize 349
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.emp_orc
+ name: default.emp_orc
+ Truncated Path -> Alias:
+ /dept_orc [d]
+ /emp_orc [e1, e]
+ Needs Tagging: true
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Inner Join 0 to 2
+ condition expressions:
+ 0 {VALUE._col0} {VALUE._col1}
+ 1 {VALUE._col0} {VALUE._col1}
+ 2 {VALUE._col0} {VALUE._col1}
+ handleSkewJoin: false
+ outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9
+ Statistics:
+ numRows: 4 dataSize: 1136 basicStatsState: COMPLETE colStatsState: COMPLETE
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: int
+ expr: _col4
+ type: int
+ expr: _col5
+ type: string
+ expr: _col8
+ type: string
+ expr: _col9
+ type: int
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics:
+ numRows: 4 dataSize: 1136 basicStatsState: COMPLETE colStatsState: COMPLETE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics:
+ numRows: 4 dataSize: 1136 basicStatsState: COMPLETE colStatsState: COMPLETE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ columns _col0,_col1,_col2,_col3,_col4,_col5
+ columns.types string:int:int:string:string:int
+ escape.delim \
+ hive.serialization.extend.nesting.levels true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: -- Expected output rows: 5
+-- Reason: #rows = (6*4*8)/max(3,6)*max(6,6)
+explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid) join loc_orc l on (e.deptid = l.locid)
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Expected output rows: 5
+-- Reason: #rows = (6*4*8)/max(3,6)*max(6,6)
+explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid) join loc_orc l on (e.deptid = l.locid)
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: dept_orc.deptid SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptid, type:int, comment:null), ]
+POSTHOOK: Lineage: dept_orc.deptname SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptname, type:string, comment:null), ]
+POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ]
+POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ]
+POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ]
+POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ]
+POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ]
+POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME emp_orc) e) (TOK_TABREF (TOK_TABNAME dept_orc) d) (= (. (TOK_TABLE_OR_COL e) deptid) (. (TOK_TABLE_OR_COL d) deptid))) (TOK_TABREF (TOK_TABNAME loc_orc) l) (= (. (TOK_TABLE_OR_COL e) deptid) (. (TOK_TABLE_OR_COL l) locid)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ d
+ TableScan
+ alias: d
+ Statistics:
+ numRows: 4 dataSize: 384 basicStatsState: COMPLETE colStatsState: COMPLETE
+ GatherStats: false
+ Reduce Output Operator
+ key expressions:
+ expr: deptid
+ type: int
+ sort order: +
+ Map-reduce partition columns:
+ expr: deptid
+ type: int
+ Statistics:
+ numRows: 4 dataSize: 384 basicStatsState: COMPLETE colStatsState: COMPLETE
+ tag: 1
+ value expressions:
+ expr: deptid
+ type: int
+ expr: deptname
+ type: string
+ e
+ TableScan
+ alias: e
+ Statistics:
+ numRows: 6 dataSize: 560 basicStatsState: COMPLETE colStatsState: COMPLETE
+ GatherStats: false
+ Reduce Output Operator
+ key expressions:
+ expr: deptid
+ type: int
+ sort order: +
+ Map-reduce partition columns:
+ expr: deptid
+ type: int
+ Statistics:
+ numRows: 6 dataSize: 560 basicStatsState: COMPLETE colStatsState: COMPLETE
+ tag: 0
+ value expressions:
+ expr: lastname
+ type: string
+ expr: deptid
+ type: int
+ l
+ TableScan
+ alias: l
+ Statistics:
+ numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE
+ GatherStats: false
+ Reduce Output Operator
+ key expressions:
+ expr: locid
+ type: int
+ sort order: +
+ Map-reduce partition columns:
+ expr: locid
+ type: int
+ Statistics:
+ numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE
+ tag: 2
+ value expressions:
+ expr: state
+ type: string
+ expr: locid
+ type: int
+ expr: zip
+ type: bigint
+ expr: year
+ type: int
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: dept_orc
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns deptid,deptname
+ columns.types int:string
+ field.delim |
+#### A masked pattern was here ####
+ name default.dept_orc
+ numFiles 1
+ numRows 4
+ rawDataSize 384
+ serialization.ddl struct dept_orc { i32 deptid, string deptname}
+ serialization.format |
+ serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ totalSize 329
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns deptid,deptname
+ columns.types int:string
+ field.delim |
+#### A masked pattern was here ####
+ name default.dept_orc
+ numFiles 1
+ numRows 4
+ rawDataSize 384
+ serialization.ddl struct dept_orc { i32 deptid, string deptname}
+ serialization.format |
+ serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ totalSize 329
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.dept_orc
+ name: default.dept_orc
+#### A masked pattern was here ####
+ Partition
+ base file name: emp_orc
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns lastname,deptid
+ columns.types string:int
+ field.delim |
+#### A masked pattern was here ####
+ name default.emp_orc
+ numFiles 1
+ numRows 6
+ rawDataSize 560
+ serialization.ddl struct emp_orc { string lastname, i32 deptid}
+ serialization.format |
+ serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ totalSize 349
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns lastname,deptid
+ columns.types string:int
+ field.delim |
+#### A masked pattern was here ####
+ name default.emp_orc
+ numFiles 1
+ numRows 6
+ rawDataSize 560
+ serialization.ddl struct emp_orc { string lastname, i32 deptid}
+ serialization.format |
+ serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ totalSize 349
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.emp_orc
+ name: default.emp_orc
+#### A masked pattern was here ####
+ Partition
+ base file name: loc_orc
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns state,locid,zip,year
+ columns.types string:int:bigint:int
+ field.delim |
+#### A masked pattern was here ####
+ name default.loc_orc
+ numFiles 1
+ numRows 8
+ rawDataSize 796
+ serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year}
+ serialization.format |
+ serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ totalSize 489
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns state,locid,zip,year
+ columns.types string:int:bigint:int
+ field.delim |
+#### A masked pattern was here ####
+ name default.loc_orc
+ numFiles 1
+ numRows 8
+ rawDataSize 796
+ serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year}
+ serialization.format |
+ serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ totalSize 489
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.loc_orc
+ name: default.loc_orc
+ Truncated Path -> Alias:
+ /dept_orc [d]
+ /emp_orc [e]
+ /loc_orc [l]
+ Needs Tagging: true
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Inner Join 0 to 2
+ condition expressions:
+ 0 {VALUE._col0} {VALUE._col1}
+ 1 {VALUE._col0} {VALUE._col1}
+ 2 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3}
+ handleSkewJoin: false
+ outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9, _col10, _col11
+ Statistics:
+ numRows: 5 dataSize: 1449 basicStatsState: COMPLETE colStatsState: COMPLETE
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: int
+ expr: _col4
+ type: int
+ expr: _col5
+ type: string
+ expr: _col8
+ type: string
+ expr: _col9
+ type: int
+ expr: _col10
+ type: bigint
+ expr: _col11
+ type: int
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics:
+ numRows: 5 dataSize: 1449 basicStatsState: COMPLETE colStatsState: COMPLETE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics:
+ numRows: 5 dataSize: 1449 basicStatsState: COMPLETE colStatsState: COMPLETE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7
+ columns.types string:int:int:string:string:int:bigint:int
+ escape.delim \
+ hive.serialization.extend.nesting.levels true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: -- join keys of different types
+-- Expected output rows: 4
+-- Reason: #rows = (6*4*8)/max(3,6)*max(6,7)
+explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid) join loc_orc l on (e.deptid = l.state)
+PREHOOK: type: QUERY
+POSTHOOK: query: -- join keys of different types
+-- Expected output rows: 4
+-- Reason: #rows = (6*4*8)/max(3,6)*max(6,7)
+explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid) join loc_orc l on (e.deptid = l.state)
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: dept_orc.deptid SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptid, type:int, comment:null), ]
+POSTHOOK: Lineage: dept_orc.deptname SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptname, type:string, comment:null), ]
+POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ]
+POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ]
+POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ]
+POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ]
+POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ]
+POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME emp_orc) e) (TOK_TABREF (TOK_TABNAME dept_orc) d) (= (. (TOK_TABLE_OR_COL e) deptid) (. (TOK_TABLE_OR_COL d) deptid))) (TOK_TABREF (TOK_TABNAME loc_orc) l) (= (. (TOK_TABLE_OR_COL e) deptid) (. (TOK_TABLE_OR_COL l) state)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ d
+ TableScan
+ alias: d
+ Statistics:
+ numRows: 4 dataSize: 384 basicStatsState: COMPLETE colStatsState: COMPLETE
+ GatherStats: false
+ Reduce Output Operator
+ key expressions:
+ expr: UDFToDouble(deptid)
+ type: double
+ sort order: +
+ Map-reduce partition columns:
+ expr: UDFToDouble(deptid)
+ type: double
+ Statistics:
+ numRows: 4 dataSize: 384 basicStatsState: COMPLETE colStatsState: COMPLETE
+ tag: 1
+ value expressions:
+ expr: deptid
+ type: int
+ expr: deptname
+ type: string
+ e
+ TableScan
+ alias: e
+ Statistics:
+ numRows: 6 dataSize: 560 basicStatsState: COMPLETE colStatsState: COMPLETE
+ GatherStats: false
+ Reduce Output Operator
+ key expressions:
+ expr: UDFToDouble(deptid)
+ type: double
+ sort order: +
+ Map-reduce partition columns:
+ expr: UDFToDouble(deptid)
+ type: double
+ Statistics:
+ numRows: 6 dataSize: 560 basicStatsState: COMPLETE colStatsState: COMPLETE
+ tag: 0
+ value expressions:
+ expr: lastname
+ type: string
+ expr: deptid
+ type: int
+ l
+ TableScan
+ alias: l
+ Statistics:
+ numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE
+ GatherStats: false
+ Reduce Output Operator
+ key expressions:
+ expr: UDFToDouble(state)
+ type: double
+ sort order: +
+ Map-reduce partition columns:
+ expr: UDFToDouble(state)
+ type: double
+ Statistics:
+ numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE
+ tag: 2
+ value expressions:
+ expr: state
+ type: string
+ expr: locid
+ type: int
+ expr: zip
+ type: bigint
+ expr: year
+ type: int
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: dept_orc
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns deptid,deptname
+ columns.types int:string
+ field.delim |
+#### A masked pattern was here ####
+ name default.dept_orc
+ numFiles 1
+ numRows 4
+ rawDataSize 384
+ serialization.ddl struct dept_orc { i32 deptid, string deptname}
+ serialization.format |
+ serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ totalSize 329
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns deptid,deptname
+ columns.types int:string
+ field.delim |
+#### A masked pattern was here ####
+ name default.dept_orc
+ numFiles 1
+ numRows 4
+ rawDataSize 384
+ serialization.ddl struct dept_orc { i32 deptid, string deptname}
+ serialization.format |
+ serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ totalSize 329
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.dept_orc
+ name: default.dept_orc
+#### A masked pattern was here ####
+ Partition
+ base file name: emp_orc
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns lastname,deptid
+ columns.types string:int
+ field.delim |
+#### A masked pattern was here ####
+ name default.emp_orc
+ numFiles 1
+ numRows 6
+ rawDataSize 560
+ serialization.ddl struct emp_orc { string lastname, i32 deptid}
+ serialization.format |
+ serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ totalSize 349
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns lastname,deptid
+ columns.types string:int
+ field.delim |
+#### A masked pattern was here ####
+ name default.emp_orc
+ numFiles 1
+ numRows 6
+ rawDataSize 560
+ serialization.ddl struct emp_orc { string lastname, i32 deptid}
+ serialization.format |
+ serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ totalSize 349
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.emp_orc
+ name: default.emp_orc
+#### A masked pattern was here ####
+ Partition
+ base file name: loc_orc
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns state,locid,zip,year
+ columns.types string:int:bigint:int
+ field.delim |
+#### A masked pattern was here ####
+ name default.loc_orc
+ numFiles 1
+ numRows 8
+ rawDataSize 796
+ serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year}
+ serialization.format |
+ serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ totalSize 489
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns state,locid,zip,year
+ columns.types string:int:bigint:int
+ field.delim |
+#### A masked pattern was here ####
+ name default.loc_orc
+ numFiles 1
+ numRows 8
+ rawDataSize 796
+ serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year}
+ serialization.format |
+ serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ totalSize 489
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.loc_orc
+ name: default.loc_orc
+ Truncated Path -> Alias:
+ /dept_orc [d]
+ /emp_orc [e]
+ /loc_orc [l]
+ Needs Tagging: true
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Inner Join 0 to 2
+ condition expressions:
+ 0 {VALUE._col0} {VALUE._col1}
+ 1 {VALUE._col0} {VALUE._col1}
+ 2 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3}
+ handleSkewJoin: false
+ outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9, _col10, _col11
+ Statistics:
+ numRows: 4 dataSize: 1156 basicStatsState: COMPLETE colStatsState: COMPLETE
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: int
+ expr: _col4
+ type: int
+ expr: _col5
+ type: string
+ expr: _col8
+ type: string
+ expr: _col9
+ type: int
+ expr: _col10
+ type: bigint
+ expr: _col11
+ type: int
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics:
+ numRows: 4 dataSize: 1156 basicStatsState: COMPLETE colStatsState: COMPLETE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics:
+ numRows: 4 dataSize: 1156 basicStatsState: COMPLETE colStatsState: COMPLETE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7
+ columns.types string:int:int:string:string:int:bigint:int
+ escape.delim \
+ hive.serialization.extend.nesting.levels true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: -- multi-attribute join
+-- Expected output rows: 0
+-- Reason: #rows = (6*4)/max(3,6)*max(7,5)
+explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid and e.lastname = d.deptname)
+PREHOOK: type: QUERY
+POSTHOOK: query: -- multi-attribute join
+-- Expected output rows: 0
+-- Reason: #rows = (6*4)/max(3,6)*max(7,5)
+explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid and e.lastname = d.deptname)
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: dept_orc.deptid SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptid, type:int, comment:null), ]
+POSTHOOK: Lineage: dept_orc.deptname SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptname, type:string, comment:null), ]
+POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ]
+POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ]
+POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ]
+POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ]
+POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ]
+POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME emp_orc) e) (TOK_TABREF (TOK_TABNAME dept_orc) d) (and (= (. (TOK_TABLE_OR_COL e) deptid) (. (TOK_TABLE_OR_COL d) deptid)) (= (. (TOK_TABLE_OR_COL e) lastname) (. (TOK_TABLE_OR_COL d) deptname))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ d
+ TableScan
+ alias: d
+ Statistics:
+ numRows: 4 dataSize: 384 basicStatsState: COMPLETE colStatsState: COMPLETE
+ GatherStats: false
+ Reduce Output Operator
+ key expressions:
+ expr: deptid
+ type: int
+ expr: deptname
+ type: string
+ sort order: ++
+ Map-reduce partition columns:
+ expr: deptid
+ type: int
+ expr: deptname
+ type: string
+ Statistics:
+ numRows: 4 dataSize: 384 basicStatsState: COMPLETE colStatsState: COMPLETE
+ tag: 1
+ value expressions:
+ expr: deptid
+ type: int
+ expr: deptname
+ type: string
+ e
+ TableScan
+ alias: e
+ Statistics:
+ numRows: 6 dataSize: 560 basicStatsState: COMPLETE colStatsState: COMPLETE
+ GatherStats: false
+ Reduce Output Operator
+ key expressions:
+ expr: deptid
+ type: int
+ expr: lastname
+ type: string
+ sort order: ++
+ Map-reduce partition columns:
+ expr: deptid
+ type: int
+ expr: lastname
+ type: string
+ Statistics:
+ numRows: 6 dataSize: 560 basicStatsState: COMPLETE colStatsState: COMPLETE
+ tag: 0
+ value expressions:
+ expr: lastname
+ type: string
+ expr: deptid
+ type: int
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: dept_orc
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns deptid,deptname
+ columns.types int:string
+ field.delim |
+#### A masked pattern was here ####
+ name default.dept_orc
+ numFiles 1
+ numRows 4
+ rawDataSize 384
+ serialization.ddl struct dept_orc { i32 deptid, string deptname}
+ serialization.format |
+ serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ totalSize 329
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns deptid,deptname
+ columns.types int:string
+ field.delim |
+#### A masked pattern was here ####
+ name default.dept_orc
+ numFiles 1
+ numRows 4
+ rawDataSize 384
+ serialization.ddl struct dept_orc { i32 deptid, string deptname}
+ serialization.format |
+ serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ totalSize 329
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.dept_orc
+ name: default.dept_orc
+#### A masked pattern was here ####
+ Partition
+ base file name: emp_orc
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns lastname,deptid
+ columns.types string:int
+ field.delim |
+#### A masked pattern was here ####
+ name default.emp_orc
+ numFiles 1
+ numRows 6
+ rawDataSize 560
+ serialization.ddl struct emp_orc { string lastname, i32 deptid}
+ serialization.format |
+ serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ totalSize 349
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns lastname,deptid
+ columns.types string:int
+ field.delim |
+#### A masked pattern was here ####
+ name default.emp_orc
+ numFiles 1
+ numRows 6
+ rawDataSize 560
+ serialization.ddl struct emp_orc { string lastname, i32 deptid}
+ serialization.format |
+ serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ totalSize 349
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.emp_orc
+ name: default.emp_orc
+ Truncated Path -> Alias:
+ /dept_orc [d]
+ /emp_orc [e]
+ Needs Tagging: true
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {VALUE._col0} {VALUE._col1}
+ 1 {VALUE._col0} {VALUE._col1}
+ handleSkewJoin: false
+ outputColumnNames: _col0, _col1, _col4, _col5
+ Statistics:
+ numRows: 0 dataSize: 0 basicStatsState: COMPLETE colStatsState: COMPLETE
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: int
+ expr: _col4
+ type: int
+ expr: _col5
+ type: string
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics:
+ numRows: 0 dataSize: 0 basicStatsState: COMPLETE colStatsState: COMPLETE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics:
+ numRows: 0 dataSize: 0 basicStatsState: COMPLETE colStatsState: COMPLETE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ columns _col0,_col1,_col2,_col3
+ columns.types string:int:int:string
+ escape.delim \
+ hive.serialization.extend.nesting.levels true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: -- 3 way and multi-attribute join
+-- Expected output rows: 0
+-- Reason: #rows = (6*4*8)/max(3,6)*max(7,5)*max(3,6)*max(7,7)
+explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid and e.lastname = d.deptname) join loc_orc l on (e.deptid = l.locid and e.lastname = l.state)
+PREHOOK: type: QUERY
+POSTHOOK: query: -- 3 way and multi-attribute join
+-- Expected output rows: 0
+-- Reason: #rows = (6*4*8)/max(3,6)*max(7,5)*max(3,6)*max(7,7)
+explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid and e.lastname = d.deptname) join loc_orc l on (e.deptid = l.locid and e.lastname = l.state)
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: dept_orc.deptid SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptid, type:int, comment:null), ]
+POSTHOOK: Lineage: dept_orc.deptname SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptname, type:string, comment:null), ]
+POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ]
+POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ]
+POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ]
+POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ]
+POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ]
+POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME emp_orc) e) (TOK_TABREF (TOK_TABNAME dept_orc) d) (and (= (. (TOK_TABLE_OR_COL e) deptid) (. (TOK_TABLE_OR_COL d) deptid)) (= (. (TOK_TABLE_OR_COL e) lastname) (. (TOK_TABLE_OR_COL d) deptname)))) (TOK_TABREF (TOK_TABNAME loc_orc) l) (and (= (. (TOK_TABLE_OR_COL e) deptid) (. (TOK_TABLE_OR_COL l) locid)) (= (. (TOK_TABLE_OR_COL e) lastname) (. (TOK_TABLE_OR_COL l) state))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ d
+ TableScan
+ alias: d
+ Statistics:
+ numRows: 4 dataSize: 384 basicStatsState: COMPLETE colStatsState: COMPLETE
+ GatherStats: false
+ Reduce Output Operator
+ key expressions:
+ expr: deptid
+ type: int
+ expr: deptname
+ type: string
+ sort order: ++
+ Map-reduce partition columns:
+ expr: deptid
+ type: int
+ expr: deptname
+ type: string
+ Statistics:
+ numRows: 4 dataSize: 384 basicStatsState: COMPLETE colStatsState: COMPLETE
+ tag: 1
+ value expressions:
+ expr: deptid
+ type: int
+ expr: deptname
+ type: string
+ e
+ TableScan
+ alias: e
+ Statistics:
+ numRows: 6 dataSize: 560 basicStatsState: COMPLETE colStatsState: COMPLETE
+ GatherStats: false
+ Reduce Output Operator
+ key expressions:
+ expr: deptid
+ type: int
+ expr: lastname
+ type: string
+ sort order: ++
+ Map-reduce partition columns:
+ expr: deptid
+ type: int
+ expr: lastname
+ type: string
+ Statistics:
+ numRows: 6 dataSize: 560 basicStatsState: COMPLETE colStatsState: COMPLETE
+ tag: 0
+ value expressions:
+ expr: lastname
+ type: string
+ expr: deptid
+ type: int
+ l
+ TableScan
+ alias: l
+ Statistics:
+ numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE
+ GatherStats: false
+ Reduce Output Operator
+ key expressions:
+ expr: locid
+ type: int
+ expr: state
+ type: string
+ sort order: ++
+ Map-reduce partition columns:
+ expr: locid
+ type: int
+ expr: state
+ type: string
+ Statistics:
+ numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE
+ tag: 2
+ value expressions:
+ expr: state
+ type: string
+ expr: locid
+ type: int
+ expr: zip
+ type: bigint
+ expr: year
+ type: int
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: dept_orc
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns deptid,deptname
+ columns.types int:string
+ field.delim |
+#### A masked pattern was here ####
+ name default.dept_orc
+ numFiles 1
+ numRows 4
+ rawDataSize 384
+ serialization.ddl struct dept_orc { i32 deptid, string deptname}
+ serialization.format |
+ serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ totalSize 329
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns deptid,deptname
+ columns.types int:string
+ field.delim |
+#### A masked pattern was here ####
+ name default.dept_orc
+ numFiles 1
+ numRows 4
+ rawDataSize 384
+ serialization.ddl struct dept_orc { i32 deptid, string deptname}
+ serialization.format |
+ serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ totalSize 329
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.dept_orc
+ name: default.dept_orc
+#### A masked pattern was here ####
+ Partition
+ base file name: emp_orc
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns lastname,deptid
+ columns.types string:int
+ field.delim |
+#### A masked pattern was here ####
+ name default.emp_orc
+ numFiles 1
+ numRows 6
+ rawDataSize 560
+ serialization.ddl struct emp_orc { string lastname, i32 deptid}
+ serialization.format |
+ serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ totalSize 349
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns lastname,deptid
+ columns.types string:int
+ field.delim |
+#### A masked pattern was here ####
+ name default.emp_orc
+ numFiles 1
+ numRows 6
+ rawDataSize 560
+ serialization.ddl struct emp_orc { string lastname, i32 deptid}
+ serialization.format |
+ serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ totalSize 349
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.emp_orc
+ name: default.emp_orc
+#### A masked pattern was here ####
+ Partition
+ base file name: loc_orc
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns state,locid,zip,year
+ columns.types string:int:bigint:int
+ field.delim |
+#### A masked pattern was here ####
+ name default.loc_orc
+ numFiles 1
+ numRows 8
+ rawDataSize 796
+ serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year}
+ serialization.format |
+ serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ totalSize 489
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns state,locid,zip,year
+ columns.types string:int:bigint:int
+ field.delim |
+#### A masked pattern was here ####
+ name default.loc_orc
+ numFiles 1
+ numRows 8
+ rawDataSize 796
+ serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year}
+ serialization.format |
+ serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ totalSize 489
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.loc_orc
+ name: default.loc_orc
+ Truncated Path -> Alias:
+ /dept_orc [d]
+ /emp_orc [e]
+ /loc_orc [l]
+ Needs Tagging: true
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Inner Join 0 to 2
+ condition expressions:
+ 0 {VALUE._col0} {VALUE._col1}
+ 1 {VALUE._col0} {VALUE._col1}
+ 2 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3}
+ handleSkewJoin: false
+ outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9, _col10, _col11
+ Statistics:
+ numRows: 0 dataSize: 0 basicStatsState: COMPLETE colStatsState: COMPLETE
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: int
+ expr: _col4
+ type: int
+ expr: _col5
+ type: string
+ expr: _col8
+ type: string
+ expr: _col9
+ type: int
+ expr: _col10
+ type: bigint
+ expr: _col11
+ type: int
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics:
+ numRows: 0 dataSize: 0 basicStatsState: COMPLETE colStatsState: COMPLETE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics:
+ numRows: 0 dataSize: 0 basicStatsState: COMPLETE colStatsState: COMPLETE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7
+ columns.types string:int:int:string:string:int:bigint:int
+ escape.delim \
+ hive.serialization.extend.nesting.levels true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+