You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2012/11/02 12:20:29 UTC
svn commit: r1404924 [4/6] - in /hive/trunk:
common/src/java/org/apache/hadoop/hive/common/
common/src/java/org/apache/hadoop/hive/conf/ conf/
ql/src/java/org/apache/hadoop/hive/ql/
ql/src/java/org/apache/hadoop/hive/ql/exec/ ql/src/java/org/apache/had...
Added: hive/trunk/ql/src/test/results/clientpositive/list_bucket_query_multiskew_1.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/list_bucket_query_multiskew_1.q.out?rev=1404924&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/list_bucket_query_multiskew_1.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/list_bucket_query_multiskew_1.q.out Fri Nov 2 11:20:26 2012
@@ -0,0 +1,746 @@
+PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+
+-- List bucketing query logic test case. We simulate the directory structure by DML here.
+-- Test condition:
+-- 1. where clause has multiple skewed columns
+-- 2. where clause doesn't have non-skewed column
+-- 3. where clause has one and operator
+-- Test focus:
+-- 1. basic list bucketing query work
+-- Test result:
+-- 1. pruner only pick up right directory
+-- 2. query result is right
+
+-- create 1 table: fact_daily
+-- 1. create a few partitions
+-- 2. dfs move partition according to list bucketing structure (simulate DML)
+-- $/fact_daily/ds=1/hr=4/x=../y=..
+-- notes: waste all partitions except ds=1 and hr=4 for list bucketing query test
+-- 3. alter it to skewed table and set up location map
+-- 4. list bucketing query
+-- fact_daily (ds=1 and hr=4) will be used for list bucketing query
+CREATE TABLE fact_daily(x int, y STRING) PARTITIONED BY (ds STRING, hr STRING)
+#### A masked pattern was here ####
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+
+-- List bucketing query logic test case. We simulate the directory structure by DML here.
+-- Test condition:
+-- 1. where clause has multiple skewed columns
+-- 2. where clause doesn't have non-skewed column
+-- 3. where clause has one and operator
+-- Test focus:
+-- 1. basic list bucketing query work
+-- Test result:
+-- 1. pruner only pick up right directory
+-- 2. query result is right
+
+-- create 1 table: fact_daily
+-- 1. create a few partitions
+-- 2. dfs move partition according to list bucketing structure (simulate DML)
+-- $/fact_daily/ds=1/hr=4/x=../y=..
+-- notes: waste all partitions except ds=1 and hr=4 for list bucketing query test
+-- 3. alter it to skewed table and set up location map
+-- 4. list bucketing query
+-- fact_daily (ds=1 and hr=4) will be used for list bucketing query
+CREATE TABLE fact_daily(x int, y STRING) PARTITIONED BY (ds STRING, hr STRING)
+#### A masked pattern was here ####
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@fact_daily
+PREHOOK: query: -- create /fact_daily/ds=1/hr=1 directory
+INSERT OVERWRITE TABLE fact_daily PARTITION (ds='1', hr='1')
+SELECT key, value FROM src WHERE key=484
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@fact_daily@ds=1/hr=1
+POSTHOOK: query: -- create /fact_daily/ds=1/hr=1 directory
+INSERT OVERWRITE TABLE fact_daily PARTITION (ds='1', hr='1')
+SELECT key, value FROM src WHERE key=484
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@fact_daily@ds=1/hr=1
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: -- create /fact_daily/ds=1/hr=2 directory
+INSERT OVERWRITE TABLE fact_daily PARTITION (ds='1', hr='2')
+SELECT key, value FROM src WHERE key=369 or key=406
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@fact_daily@ds=1/hr=2
+POSTHOOK: query: -- create /fact_daily/ds=1/hr=2 directory
+INSERT OVERWRITE TABLE fact_daily PARTITION (ds='1', hr='2')
+SELECT key, value FROM src WHERE key=369 or key=406
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@fact_daily@ds=1/hr=2
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: -- create /fact_daily/ds=1/hr=3 directory
+INSERT OVERWRITE TABLE fact_daily PARTITION (ds='1', hr='3')
+SELECT key, value FROM src WHERE key=238
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@fact_daily@ds=1/hr=3
+POSTHOOK: query: -- create /fact_daily/ds=1/hr=3 directory
+INSERT OVERWRITE TABLE fact_daily PARTITION (ds='1', hr='3')
+SELECT key, value FROM src WHERE key=238
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@fact_daily@ds=1/hr=3
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+#### A masked pattern was here ####
+PREHOOK: query: -- switch fact_daily to skewed table and point its location to /fact_daily/ds=1
+alter table fact_daily skewed by (x,y) on ((484,'val_484'),(238,'val_238'))
+PREHOOK: type: ALTERTABLE_SKEWED
+PREHOOK: Input: default@fact_daily
+PREHOOK: Output: default@fact_daily
+POSTHOOK: query: -- switch fact_daily to skewed table and point its location to /fact_daily/ds=1
+alter table fact_daily skewed by (x,y) on ((484,'val_484'),(238,'val_238'))
+POSTHOOK: type: ALTERTABLE_SKEWED
+POSTHOOK: Input: default@fact_daily
+POSTHOOK: Output: default@fact_daily
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: ALTER TABLE fact_daily ADD PARTITION (ds='1', hr='4')
+PREHOOK: type: ALTERTABLE_ADDPARTS
+PREHOOK: Input: default@fact_daily
+POSTHOOK: query: ALTER TABLE fact_daily ADD PARTITION (ds='1', hr='4')
+POSTHOOK: type: ALTERTABLE_ADDPARTS
+POSTHOOK: Input: default@fact_daily
+POSTHOOK: Output: default@fact_daily@ds=1/hr=4
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: -- set List Bucketing location map
+#### A masked pattern was here ####
+PREHOOK: type: ALTERTBLPART_SKEWED_LOCATION
+PREHOOK: Input: default@fact_daily
+PREHOOK: Output: default@fact_daily@ds=1/hr=4
+POSTHOOK: query: -- set List Bucketing location map
+#### A masked pattern was here ####
+POSTHOOK: type: ALTERTBLPART_SKEWED_LOCATION
+POSTHOOK: Input: default@fact_daily
+POSTHOOK: Input: default@fact_daily@ds=1/hr=4
+POSTHOOK: Output: default@fact_daily@ds=1/hr=4
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: describe formatted fact_daily PARTITION (ds = '1', hr='4')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: describe formatted fact_daily PARTITION (ds = '1', hr='4')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+# col_name data_type comment
+
+x int None
+y string None
+
+# Partition Information
+# col_name data_type comment
+
+ds string None
+hr string None
+
+# Detailed Partition Information
+Partition Value: [1, 4]
+Database: default
+Table: fact_daily
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Skewed Columns: [x, y]
+Skewed Values: [[484, val_484], [238, val_238]]
+#### A masked pattern was here ####
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: SELECT * FROM fact_daily WHERE ds='1' and hr='4'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@fact_daily@ds=1/hr=4
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM fact_daily WHERE ds='1' and hr='4'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@fact_daily@ds=1/hr=4
+#### A masked pattern was here ####
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+369 val_369 1 4
+406 val_406 1 4
+369 val_369 1 4
+369 val_369 1 4
+406 val_406 1 4
+406 val_406 1 4
+406 val_406 1 4
+238 val_238 1 4
+238 val_238 1 4
+484 val_484 1 4
+PREHOOK: query: -- pruner only pick up skewed-value directory
+-- explain plan shows which directory selected: Truncated Path -> Alias
+explain extended SELECT x FROM fact_daily WHERE ( ds='1' and hr='4') and (x=484 and y= 'val_484')
+PREHOOK: type: QUERY
+POSTHOOK: query: -- pruner only pick up skewed-value directory
+-- explain plan shows which directory selected: Truncated Path -> Alias
+explain extended SELECT x FROM fact_daily WHERE ( ds='1' and hr='4') and (x=484 and y= 'val_484')
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME fact_daily))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL x))) (TOK_WHERE (and (and (= (TOK_TABLE_OR_COL ds) '1') (= (TOK_TABLE_OR_COL hr) '4')) (and (= (TOK_TABLE_OR_COL x) 484) (= (TOK_TABLE_OR_COL y) 'val_484'))))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ fact_daily
+ TableScan
+ alias: fact_daily
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate:
+ expr: ((x = 484) and (y = 'val_484'))
+ type: boolean
+ Select Operator
+ expressions:
+ expr: x
+ type: int
+ outputColumnNames: _col0
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ columns _col0
+ columns.types int
+ escape.delim \
+ serialization.format 1
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Needs Tagging: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: y=val_484
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 1
+ hr 4
+ properties:
+ bucket_count -1
+ columns x,y
+ columns.types int:string
+#### A masked pattern was here ####
+ name default.fact_daily
+ numFiles 3
+ numPartitions 3
+ numRows 10
+ partition_columns ds/hr
+ rawDataSize 110
+ serialization.ddl struct fact_daily { i32 x, string y}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 120
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns x,y
+ columns.types int:string
+#### A masked pattern was here ####
+ name default.fact_daily
+ numFiles 3
+ numPartitions 3
+ numRows 10
+ partition_columns ds/hr
+ rawDataSize 110
+ serialization.ddl struct fact_daily { i32 x, string y}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 120
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.fact_daily
+ name: default.fact_daily
+ Truncated Path -> Alias:
+ /fact_daily/ds=1/hr=4/x=484/y=val_484 [fact_daily]
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: -- List Bucketing Query
+SELECT x FROM fact_daily WHERE ( ds='1' and hr='4') and (x=484 and y= 'val_484')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@fact_daily@ds=1/hr=4
+#### A masked pattern was here ####
+POSTHOOK: query: -- List Bucketing Query
+SELECT x FROM fact_daily WHERE ( ds='1' and hr='4') and (x=484 and y= 'val_484')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@fact_daily@ds=1/hr=4
+#### A masked pattern was here ####
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+484
+PREHOOK: query: -- pruner only pick up skewed-value directory
+-- explain plan shows which directory selected: Truncated Path -> Alias
+explain extended SELECT x,y FROM fact_daily WHERE ( ds='1' and hr='4') and (x=238 and y= 'val_238')
+PREHOOK: type: QUERY
+POSTHOOK: query: -- pruner only pick up skewed-value directory
+-- explain plan shows which directory selected: Truncated Path -> Alias
+explain extended SELECT x,y FROM fact_daily WHERE ( ds='1' and hr='4') and (x=238 and y= 'val_238')
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME fact_daily))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL x)) (TOK_SELEXPR (TOK_TABLE_OR_COL y))) (TOK_WHERE (and (and (= (TOK_TABLE_OR_COL ds) '1') (= (TOK_TABLE_OR_COL hr) '4')) (and (= (TOK_TABLE_OR_COL x) 238) (= (TOK_TABLE_OR_COL y) 'val_238'))))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ fact_daily
+ TableScan
+ alias: fact_daily
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate:
+ expr: ((x = 238) and (y = 'val_238'))
+ type: boolean
+ Select Operator
+ expressions:
+ expr: x
+ type: int
+ expr: y
+ type: string
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ columns _col0,_col1
+ columns.types int:string
+ escape.delim \
+ serialization.format 1
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Needs Tagging: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: y=val_238
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 1
+ hr 4
+ properties:
+ bucket_count -1
+ columns x,y
+ columns.types int:string
+#### A masked pattern was here ####
+ name default.fact_daily
+ numFiles 3
+ numPartitions 3
+ numRows 10
+ partition_columns ds/hr
+ rawDataSize 110
+ serialization.ddl struct fact_daily { i32 x, string y}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 120
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns x,y
+ columns.types int:string
+#### A masked pattern was here ####
+ name default.fact_daily
+ numFiles 3
+ numPartitions 3
+ numRows 10
+ partition_columns ds/hr
+ rawDataSize 110
+ serialization.ddl struct fact_daily { i32 x, string y}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 120
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.fact_daily
+ name: default.fact_daily
+ Truncated Path -> Alias:
+ /fact_daily/ds=1/hr=4/x=238/y=val_238 [fact_daily]
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: -- List Bucketing Query
+SELECT x,y FROM fact_daily WHERE ( ds='1' and hr='4') and (x=238 and y= 'val_238')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@fact_daily@ds=1/hr=4
+#### A masked pattern was here ####
+POSTHOOK: query: -- List Bucketing Query
+SELECT x,y FROM fact_daily WHERE ( ds='1' and hr='4') and (x=238 and y= 'val_238')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@fact_daily@ds=1/hr=4
+#### A masked pattern was here ####
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+238 val_238
+238 val_238
+PREHOOK: query: -- pruner only pick up default directory
+-- explain plan shows which directory selected: Truncated Path -> Alias
+explain extended SELECT x FROM fact_daily WHERE ( ds='1' and hr='4') and (y = "3")
+PREHOOK: type: QUERY
+POSTHOOK: query: -- pruner only pick up default directory
+-- explain plan shows which directory selected: Truncated Path -> Alias
+explain extended SELECT x FROM fact_daily WHERE ( ds='1' and hr='4') and (y = "3")
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME fact_daily))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL x))) (TOK_WHERE (and (and (= (TOK_TABLE_OR_COL ds) '1') (= (TOK_TABLE_OR_COL hr) '4')) (= (TOK_TABLE_OR_COL y) "3")))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ fact_daily
+ TableScan
+ alias: fact_daily
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate:
+ expr: (y = '3')
+ type: boolean
+ Select Operator
+ expressions:
+ expr: x
+ type: int
+ outputColumnNames: _col0
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ columns _col0
+ columns.types int
+ escape.delim \
+ serialization.format 1
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Needs Tagging: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 1
+ hr 4
+ properties:
+ bucket_count -1
+ columns x,y
+ columns.types int:string
+#### A masked pattern was here ####
+ name default.fact_daily
+ numFiles 3
+ numPartitions 3
+ numRows 10
+ partition_columns ds/hr
+ rawDataSize 110
+ serialization.ddl struct fact_daily { i32 x, string y}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 120
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns x,y
+ columns.types int:string
+#### A masked pattern was here ####
+ name default.fact_daily
+ numFiles 3
+ numPartitions 3
+ numRows 10
+ partition_columns ds/hr
+ rawDataSize 110
+ serialization.ddl struct fact_daily { i32 x, string y}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 120
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.fact_daily
+ name: default.fact_daily
+ Truncated Path -> Alias:
+ /fact_daily/ds=1/hr=4/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME [fact_daily]
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: -- List Bucketing Query
+SELECT x FROM fact_daily WHERE ( ds='1' and hr='4') and (y = "3")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@fact_daily@ds=1/hr=4
+#### A masked pattern was here ####
+POSTHOOK: query: -- List Bucketing Query
+SELECT x FROM fact_daily WHERE ( ds='1' and hr='4') and (y = "3")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@fact_daily@ds=1/hr=4
+#### A masked pattern was here ####
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: -- pruner only pick up default directory
+-- explain plan shows which directory selected: Truncated Path -> Alias
+explain extended SELECT x,y FROM fact_daily WHERE ( ds='1' and hr='4') and x = 495
+PREHOOK: type: QUERY
+POSTHOOK: query: -- pruner only pick up default directory
+-- explain plan shows which directory selected: Truncated Path -> Alias
+explain extended SELECT x,y FROM fact_daily WHERE ( ds='1' and hr='4') and x = 495
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME fact_daily))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL x)) (TOK_SELEXPR (TOK_TABLE_OR_COL y))) (TOK_WHERE (and (and (= (TOK_TABLE_OR_COL ds) '1') (= (TOK_TABLE_OR_COL hr) '4')) (= (TOK_TABLE_OR_COL x) 495)))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ fact_daily
+ TableScan
+ alias: fact_daily
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate:
+ expr: (x = 495)
+ type: boolean
+ Select Operator
+ expressions:
+ expr: x
+ type: int
+ expr: y
+ type: string
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ columns _col0,_col1
+ columns.types int:string
+ escape.delim \
+ serialization.format 1
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Needs Tagging: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 1
+ hr 4
+ properties:
+ bucket_count -1
+ columns x,y
+ columns.types int:string
+#### A masked pattern was here ####
+ name default.fact_daily
+ numFiles 3
+ numPartitions 3
+ numRows 10
+ partition_columns ds/hr
+ rawDataSize 110
+ serialization.ddl struct fact_daily { i32 x, string y}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 120
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns x,y
+ columns.types int:string
+#### A masked pattern was here ####
+ name default.fact_daily
+ numFiles 3
+ numPartitions 3
+ numRows 10
+ partition_columns ds/hr
+ rawDataSize 110
+ serialization.ddl struct fact_daily { i32 x, string y}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 120
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.fact_daily
+ name: default.fact_daily
+ Truncated Path -> Alias:
+ /fact_daily/ds=1/hr=4/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME [fact_daily]
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: -- List Bucketing Query
+SELECT x,y FROM fact_daily WHERE ( ds='1' and hr='4') and x = 369
+PREHOOK: type: QUERY
+PREHOOK: Input: default@fact_daily@ds=1/hr=4
+#### A masked pattern was here ####
+POSTHOOK: query: -- List Bucketing Query
+SELECT x,y FROM fact_daily WHERE ( ds='1' and hr='4') and x = 369
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@fact_daily@ds=1/hr=4
+#### A masked pattern was here ####
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+369 val_369
+369 val_369
+369 val_369
Added: hive/trunk/ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out?rev=1404924&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out Fri Nov 2 11:20:26 2012
@@ -0,0 +1,729 @@
+PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+
+-- List bucketing query logic test case. We simulate the directory structure by DML here.
+-- Test condition:
+-- 1. where clause has multiple skewed columns and non-skewed columns
+-- 3. where clause has a few operators
+-- Test focus:
+-- 1. basic list bucketing query work
+-- Test result:
+-- 1. pruner only pick up right directory
+-- 2. query result is right
+
+
+-- create 1 table: fact_daily
+-- 1. create a few partitions
+-- 2. dfs move partition according to list bucketing structure (simulate DML)
+-- $/fact_daily/ds=1/hr=4/x=../y=..
+-- notes: waste all partitions except ds=1 and hr=4 for list bucketing query test
+-- 3. alter it to skewed table and set up location map
+-- 4. list bucketing query
+-- fact_daily (ds=1 and hr=4) will be used for list bucketing query
+CREATE TABLE fact_daily(x int, y STRING) PARTITIONED BY (ds STRING, hr STRING)
+#### A masked pattern was here ####
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+
+-- List bucketing query logic test case. We simulate the directory structure by DML here.
+-- Test condition:
+-- 1. where clause has multiple skewed columns and non-skewed columns
+-- 3. where clause has a few operators
+-- Test focus:
+-- 1. basic list bucketing query work
+-- Test result:
+-- 1. pruner only pick up right directory
+-- 2. query result is right
+
+
+-- create 1 table: fact_daily
+-- 1. create a few partitions
+-- 2. dfs move partition according to list bucketing structure (simulate DML)
+-- $/fact_daily/ds=1/hr=4/x=../y=..
+-- notes: waste all partitions except ds=1 and hr=4 for list bucketing query test
+-- 3. alter it to skewed table and set up location map
+-- 4. list bucketing query
+-- fact_daily (ds=1 and hr=4) will be used for list bucketing query
+CREATE TABLE fact_daily(x int, y STRING) PARTITIONED BY (ds STRING, hr STRING)
+#### A masked pattern was here ####
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@fact_daily
+PREHOOK: query: -- create /fact_daily/ds=1/hr=1 directory
+INSERT OVERWRITE TABLE fact_daily PARTITION (ds='1', hr='1')
+SELECT key, value FROM src WHERE key=484
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@fact_daily@ds=1/hr=1
+POSTHOOK: query: -- create /fact_daily/ds=1/hr=1 directory
+INSERT OVERWRITE TABLE fact_daily PARTITION (ds='1', hr='1')
+SELECT key, value FROM src WHERE key=484
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@fact_daily@ds=1/hr=1
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: -- create /fact_daily/ds=1/hr=2 directory
+INSERT OVERWRITE TABLE fact_daily PARTITION (ds='1', hr='2')
+SELECT key, value FROM src WHERE key=369 or key=406
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@fact_daily@ds=1/hr=2
+POSTHOOK: query: -- create /fact_daily/ds=1/hr=2 directory
+INSERT OVERWRITE TABLE fact_daily PARTITION (ds='1', hr='2')
+SELECT key, value FROM src WHERE key=369 or key=406
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@fact_daily@ds=1/hr=2
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: -- create /fact_daily/ds=1/hr=3 directory
+INSERT OVERWRITE TABLE fact_daily PARTITION (ds='1', hr='3')
+SELECT key, value FROM src WHERE key=238
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@fact_daily@ds=1/hr=3
+POSTHOOK: query: -- create /fact_daily/ds=1/hr=3 directory
+INSERT OVERWRITE TABLE fact_daily PARTITION (ds='1', hr='3')
+SELECT key, value FROM src WHERE key=238
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@fact_daily@ds=1/hr=3
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+#### A masked pattern was here ####
+PREHOOK: query: -- switch fact_daily to skewed table and point its location to /fact_daily/ds=1
+alter table fact_daily skewed by (x,y) on ((484,'val_484'),(238,'val_238'))
+PREHOOK: type: ALTERTABLE_SKEWED
+PREHOOK: Input: default@fact_daily
+PREHOOK: Output: default@fact_daily
+POSTHOOK: query: -- switch fact_daily to skewed table and point its location to /fact_daily/ds=1
+alter table fact_daily skewed by (x,y) on ((484,'val_484'),(238,'val_238'))
+POSTHOOK: type: ALTERTABLE_SKEWED
+POSTHOOK: Input: default@fact_daily
+POSTHOOK: Output: default@fact_daily
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: ALTER TABLE fact_daily ADD PARTITION (ds='1', hr='4')
+PREHOOK: type: ALTERTABLE_ADDPARTS
+PREHOOK: Input: default@fact_daily
+POSTHOOK: query: ALTER TABLE fact_daily ADD PARTITION (ds='1', hr='4')
+POSTHOOK: type: ALTERTABLE_ADDPARTS
+POSTHOOK: Input: default@fact_daily
+POSTHOOK: Output: default@fact_daily@ds=1/hr=4
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: -- set List Bucketing location map
+#### A masked pattern was here ####
+PREHOOK: type: ALTERTBLPART_SKEWED_LOCATION
+PREHOOK: Input: default@fact_daily
+PREHOOK: Output: default@fact_daily@ds=1/hr=4
+POSTHOOK: query: -- set List Bucketing location map
+#### A masked pattern was here ####
+POSTHOOK: type: ALTERTBLPART_SKEWED_LOCATION
+POSTHOOK: Input: default@fact_daily
+POSTHOOK: Input: default@fact_daily@ds=1/hr=4
+POSTHOOK: Output: default@fact_daily@ds=1/hr=4
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: describe formatted fact_daily PARTITION (ds = '1', hr='4')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: describe formatted fact_daily PARTITION (ds = '1', hr='4')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+# col_name data_type comment
+
+x int None
+y string None
+
+# Partition Information
+# col_name data_type comment
+
+ds string None
+hr string None
+
+# Detailed Partition Information
+Partition Value: [1, 4]
+Database: default
+Table: fact_daily
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Skewed Columns: [x, y]
+Skewed Values: [[484, val_484], [238, val_238]]
+#### A masked pattern was here ####
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: SELECT * FROM fact_daily WHERE ds='1' and hr='4'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@fact_daily@ds=1/hr=4
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM fact_daily WHERE ds='1' and hr='4'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@fact_daily@ds=1/hr=4
+#### A masked pattern was here ####
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+369 val_369 1 4
+406 val_406 1 4
+369 val_369 1 4
+369 val_369 1 4
+406 val_406 1 4
+406 val_406 1 4
+406 val_406 1 4
+238 val_238 1 4
+238 val_238 1 4
+484 val_484 1 4
+PREHOOK: query: -- pruner only pick up default directory
+-- explain plan shows which directory selected: Truncated Path -> Alias
+explain extended SELECT x,y FROM fact_daily WHERE ds='1' and hr='4' and y= 'val_484'
+PREHOOK: type: QUERY
+POSTHOOK: query: -- pruner only pick up default directory
+-- explain plan shows which directory selected: Truncated Path -> Alias
+explain extended SELECT x,y FROM fact_daily WHERE ds='1' and hr='4' and y= 'val_484'
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME fact_daily))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL x)) (TOK_SELEXPR (TOK_TABLE_OR_COL y))) (TOK_WHERE (and (and (= (TOK_TABLE_OR_COL ds) '1') (= (TOK_TABLE_OR_COL hr) '4')) (= (TOK_TABLE_OR_COL y) 'val_484')))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ fact_daily
+ TableScan
+ alias: fact_daily
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate:
+ expr: (y = 'val_484')
+ type: boolean
+ Select Operator
+ expressions:
+ expr: x
+ type: int
+ expr: y
+ type: string
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ columns _col0,_col1
+ columns.types int:string
+ escape.delim \
+ serialization.format 1
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Needs Tagging: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 1
+ hr 4
+ properties:
+ bucket_count -1
+ columns x,y
+ columns.types int:string
+#### A masked pattern was here ####
+ name default.fact_daily
+ numFiles 3
+ numPartitions 3
+ numRows 10
+ partition_columns ds/hr
+ rawDataSize 110
+ serialization.ddl struct fact_daily { i32 x, string y}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 120
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns x,y
+ columns.types int:string
+#### A masked pattern was here ####
+ name default.fact_daily
+ numFiles 3
+ numPartitions 3
+ numRows 10
+ partition_columns ds/hr
+ rawDataSize 110
+ serialization.ddl struct fact_daily { i32 x, string y}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 120
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.fact_daily
+ name: default.fact_daily
+#### A masked pattern was here ####
+ Partition
+ base file name: y=val_484
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 1
+ hr 4
+ properties:
+ bucket_count -1
+ columns x,y
+ columns.types int:string
+#### A masked pattern was here ####
+ name default.fact_daily
+ numFiles 3
+ numPartitions 3
+ numRows 10
+ partition_columns ds/hr
+ rawDataSize 110
+ serialization.ddl struct fact_daily { i32 x, string y}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 120
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns x,y
+ columns.types int:string
+#### A masked pattern was here ####
+ name default.fact_daily
+ numFiles 3
+ numPartitions 3
+ numRows 10
+ partition_columns ds/hr
+ rawDataSize 110
+ serialization.ddl struct fact_daily { i32 x, string y}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 120
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.fact_daily
+ name: default.fact_daily
+ Truncated Path -> Alias:
+ /fact_daily/ds=1/hr=4/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME [fact_daily]
+ /fact_daily/ds=1/hr=4/x=484/y=val_484 [fact_daily]
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: -- List Bucketing Query
+SELECT x,y FROM fact_daily WHERE ds='1' and hr='4' and y= 'val_484'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@fact_daily@ds=1/hr=4
+#### A masked pattern was here ####
+POSTHOOK: query: -- List Bucketing Query
+SELECT x,y FROM fact_daily WHERE ds='1' and hr='4' and y= 'val_484'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@fact_daily@ds=1/hr=4
+#### A masked pattern was here ####
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+484 val_484
+PREHOOK: query: -- pruner only pick up default directory
+-- explain plan shows which directory selected: Truncated Path -> Alias
+explain extended SELECT x FROM fact_daily WHERE ds='1' and hr='4' and x= 406
+PREHOOK: type: QUERY
+POSTHOOK: query: -- pruner only pick up default directory
+-- explain plan shows which directory selected: Truncated Path -> Alias
+explain extended SELECT x FROM fact_daily WHERE ds='1' and hr='4' and x= 406
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME fact_daily))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL x))) (TOK_WHERE (and (and (= (TOK_TABLE_OR_COL ds) '1') (= (TOK_TABLE_OR_COL hr) '4')) (= (TOK_TABLE_OR_COL x) 406)))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ fact_daily
+ TableScan
+ alias: fact_daily
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate:
+ expr: (x = 406)
+ type: boolean
+ Select Operator
+ expressions:
+ expr: x
+ type: int
+ outputColumnNames: _col0
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ columns _col0
+ columns.types int
+ escape.delim \
+ serialization.format 1
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Needs Tagging: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 1
+ hr 4
+ properties:
+ bucket_count -1
+ columns x,y
+ columns.types int:string
+#### A masked pattern was here ####
+ name default.fact_daily
+ numFiles 3
+ numPartitions 3
+ numRows 10
+ partition_columns ds/hr
+ rawDataSize 110
+ serialization.ddl struct fact_daily { i32 x, string y}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 120
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns x,y
+ columns.types int:string
+#### A masked pattern was here ####
+ name default.fact_daily
+ numFiles 3
+ numPartitions 3
+ numRows 10
+ partition_columns ds/hr
+ rawDataSize 110
+ serialization.ddl struct fact_daily { i32 x, string y}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 120
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.fact_daily
+ name: default.fact_daily
+ Truncated Path -> Alias:
+ /fact_daily/ds=1/hr=4/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME [fact_daily]
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: -- List Bucketing Query
+SELECT x,y FROM fact_daily WHERE ds='1' and hr='4' and x= 406
+PREHOOK: type: QUERY
+PREHOOK: Input: default@fact_daily@ds=1/hr=4
+#### A masked pattern was here ####
+POSTHOOK: query: -- List Bucketing Query
+SELECT x,y FROM fact_daily WHERE ds='1' and hr='4' and x= 406
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@fact_daily@ds=1/hr=4
+#### A masked pattern was here ####
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+406 val_406
+406 val_406
+406 val_406
+406 val_406
+PREHOOK: query: -- pruner only pick up skewed-value directory
+-- explain plan shows which directory selected: Truncated Path -> Alias
+explain extended SELECT x,y FROM fact_daily WHERE ds='1' and hr='4' and ( (x=484 and y ='val_484') or (x=238 and y= 'val_238'))
+PREHOOK: type: QUERY
+POSTHOOK: query: -- pruner only pick up skewed-value directory
+-- explain plan shows which directory selected: Truncated Path -> Alias
+explain extended SELECT x,y FROM fact_daily WHERE ds='1' and hr='4' and ( (x=484 and y ='val_484') or (x=238 and y= 'val_238'))
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME fact_daily))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL x)) (TOK_SELEXPR (TOK_TABLE_OR_COL y))) (TOK_WHERE (and (and (= (TOK_TABLE_OR_COL ds) '1') (= (TOK_TABLE_OR_COL hr) '4')) (or (and (= (TOK_TABLE_OR_COL x) 484) (= (TOK_TABLE_OR_COL y) 'val_484')) (and (= (TOK_TABLE_OR_COL x) 238) (= (TOK_TABLE_OR_COL y) 'val_238')))))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ fact_daily
+ TableScan
+ alias: fact_daily
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate:
+ expr: (((x = 484) and (y = 'val_484')) or ((x = 238) and (y = 'val_238')))
+ type: boolean
+ Select Operator
+ expressions:
+ expr: x
+ type: int
+ expr: y
+ type: string
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ columns _col0,_col1
+ columns.types int:string
+ escape.delim \
+ serialization.format 1
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Needs Tagging: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: y=val_238
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 1
+ hr 4
+ properties:
+ bucket_count -1
+ columns x,y
+ columns.types int:string
+#### A masked pattern was here ####
+ name default.fact_daily
+ numFiles 3
+ numPartitions 3
+ numRows 10
+ partition_columns ds/hr
+ rawDataSize 110
+ serialization.ddl struct fact_daily { i32 x, string y}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 120
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns x,y
+ columns.types int:string
+#### A masked pattern was here ####
+ name default.fact_daily
+ numFiles 3
+ numPartitions 3
+ numRows 10
+ partition_columns ds/hr
+ rawDataSize 110
+ serialization.ddl struct fact_daily { i32 x, string y}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 120
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.fact_daily
+ name: default.fact_daily
+#### A masked pattern was here ####
+ Partition
+ base file name: y=val_484
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 1
+ hr 4
+ properties:
+ bucket_count -1
+ columns x,y
+ columns.types int:string
+#### A masked pattern was here ####
+ name default.fact_daily
+ numFiles 3
+ numPartitions 3
+ numRows 10
+ partition_columns ds/hr
+ rawDataSize 110
+ serialization.ddl struct fact_daily { i32 x, string y}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 120
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns x,y
+ columns.types int:string
+#### A masked pattern was here ####
+ name default.fact_daily
+ numFiles 3
+ numPartitions 3
+ numRows 10
+ partition_columns ds/hr
+ rawDataSize 110
+ serialization.ddl struct fact_daily { i32 x, string y}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 120
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.fact_daily
+ name: default.fact_daily
+ Truncated Path -> Alias:
+ /fact_daily/ds=1/hr=4/x=238/y=val_238 [fact_daily]
+ /fact_daily/ds=1/hr=4/x=484/y=val_484 [fact_daily]
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: -- List Bucketing Query
+SELECT x,y FROM fact_daily WHERE ds='1' and hr='4' and ( (x=484 and y ='val_484') or (x=238 and y= 'val_238'))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@fact_daily@ds=1/hr=4
+#### A masked pattern was here ####
+POSTHOOK: query: -- List Bucketing Query
+SELECT x,y FROM fact_daily WHERE ds='1' and hr='4' and ( (x=484 and y ='val_484') or (x=238 and y= 'val_238'))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@fact_daily@ds=1/hr=4
+#### A masked pattern was here ####
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+238 val_238
+238 val_238
+484 val_484
+PREHOOK: query: -- clean up
+drop table fact_daily
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@fact_daily
+PREHOOK: Output: default@fact_daily
+POSTHOOK: query: -- clean up
+drop table fact_daily
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@fact_daily
+POSTHOOK: Output: default@fact_daily
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]