You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2012/12/05 12:59:26 UTC
svn commit: r1417374 [10/11] - in /hive/trunk:
common/src/java/org/apache/hadoop/hive/common/
common/src/java/org/apache/hadoop/hive/conf/ conf/
ql/src/java/org/apache/hadoop/hive/ql/
ql/src/java/org/apache/hadoop/hive/ql/exec/ ql/src/java/org/apache/h...
Modified: hive/trunk/ql/src/test/results/clientpositive/list_bucket_query_multiskew_3.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/list_bucket_query_multiskew_3.q.out?rev=1417374&r1=1417373&r2=1417374&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/list_bucket_query_multiskew_3.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/list_bucket_query_multiskew_3.q.out Wed Dec 5 11:59:15 2012
@@ -12,11 +12,9 @@ PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_
-- 1. pruner only pick up right directory
-- 2. query result is right
--- create 2 tables: fact_daily and fact_daily
--- fact_daily will be used for list bucketing query
--- fact_daily is a table used to prepare data and test directories
-CREATE TABLE fact_daily(x int, y STRING, z STRING) PARTITIONED BY (ds STRING, hr STRING)
-#### A masked pattern was here ####
+-- create a skewed table
+create table fact_daily (key String, value String)
+partitioned by (ds String, hr String)
PREHOOK: type: CREATETABLE
POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
@@ -32,239 +30,104 @@ POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR
-- 1. pruner only pick up right directory
-- 2. query result is right
--- create 2 tables: fact_daily and fact_daily
--- fact_daily will be used for list bucketing query
--- fact_daily is a table used to prepare data and test directories
-CREATE TABLE fact_daily(x int, y STRING, z STRING) PARTITIONED BY (ds STRING, hr STRING)
-#### A masked pattern was here ####
+-- create a skewed table
+create table fact_daily (key String, value String)
+partitioned by (ds String, hr String)
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: default@fact_daily
-PREHOOK: query: -- create /fact_daily/ds=1/hr=1 directory
-INSERT OVERWRITE TABLE fact_daily PARTITION (ds='1', hr='1')
-SELECT key, value, value FROM src WHERE key=484
+PREHOOK: query: -- partition no skew
+insert overwrite table fact_daily partition (ds = '1', hr = '1')
+select key, value from src
PREHOOK: type: QUERY
PREHOOK: Input: default@src
PREHOOK: Output: default@fact_daily@ds=1/hr=1
-POSTHOOK: query: -- create /fact_daily/ds=1/hr=1 directory
-INSERT OVERWRITE TABLE fact_daily PARTITION (ds='1', hr='1')
-SELECT key, value, value FROM src WHERE key=484
+POSTHOOK: query: -- partition no skew
+insert overwrite table fact_daily partition (ds = '1', hr = '1')
+select key, value from src
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
POSTHOOK: Output: default@fact_daily@ds=1/hr=1
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-PREHOOK: query: -- create /fact_daily/ds=1/hr=2 directory
-INSERT OVERWRITE TABLE fact_daily PARTITION (ds='1', hr='2')
-SELECT key+11, value, value FROM src WHERE key=484
-PREHOOK: type: QUERY
-PREHOOK: Input: default@src
-PREHOOK: Output: default@fact_daily@ds=1/hr=2
-POSTHOOK: query: -- create /fact_daily/ds=1/hr=2 directory
-INSERT OVERWRITE TABLE fact_daily PARTITION (ds='1', hr='2')
-SELECT key+11, value, value FROM src WHERE key=484
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@src
-POSTHOOK: Output: default@fact_daily@ds=1/hr=2
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-PREHOOK: query: -- create /fact_daily/ds=1/hr=3 directory
-INSERT OVERWRITE TABLE fact_daily PARTITION (ds='1', hr='3')
-SELECT key, value, value FROM src WHERE key=238
-PREHOOK: type: QUERY
-PREHOOK: Input: default@src
-PREHOOK: Output: default@fact_daily@ds=1/hr=3
-POSTHOOK: query: -- create /fact_daily/ds=1/hr=3 directory
-INSERT OVERWRITE TABLE fact_daily PARTITION (ds='1', hr='3')
-SELECT key, value, value FROM src WHERE key=238
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@src
-POSTHOOK: Output: default@fact_daily@ds=1/hr=3
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-PREHOOK: query: -- create /fact_daily/ds=1/hr=4 directory
-INSERT OVERWRITE TABLE fact_daily PARTITION (ds='1', hr='4')
-SELECT key, value, value FROM src WHERE key=98
-PREHOOK: type: QUERY
-PREHOOK: Input: default@src
-PREHOOK: Output: default@fact_daily@ds=1/hr=4
-POSTHOOK: query: -- create /fact_daily/ds=1/hr=4 directory
-INSERT OVERWRITE TABLE fact_daily PARTITION (ds='1', hr='4')
-SELECT key, value, value FROM src WHERE key=98
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@src
-POSTHOOK: Output: default@fact_daily@ds=1/hr=4
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=4).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=4).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=4).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: describe formatted fact_daily PARTITION (ds = '1', hr='1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: describe formatted fact_daily PARTITION (ds = '1', hr='1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+ds string None
+hr string None
+
+# Detailed Partition Information
+Partition Value: [1, 1]
+Database: default
+Table: fact_daily
#### A masked pattern was here ####
-PREHOOK: query: -- create a non-skewed partition ds=200 and hr =1 in fact_daily table
-INSERT OVERWRITE TABLE fact_daily PARTITION (ds='200', hr='1') SELECT key, value, value FROM src WHERE key=145 or key=406 or key=429
-PREHOOK: type: QUERY
-PREHOOK: Input: default@src
-PREHOOK: Output: default@fact_daily@ds=200/hr=1
-POSTHOOK: query: -- create a non-skewed partition ds=200 and hr =1 in fact_daily table
-INSERT OVERWRITE TABLE fact_daily PARTITION (ds='200', hr='1') SELECT key, value, value FROM src WHERE key=145 or key=406 or key=429
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@src
-POSTHOOK: Output: default@fact_daily@ds=200/hr=1
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=4).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=4).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=4).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=200,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=200,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=200,hr=1).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-PREHOOK: query: -- switch fact_daily to skewed table, create partition ds=1 and hr=5 and point its location to /fact_daily/ds=1
-alter table fact_daily skewed by (x,y) on ((484,'val_484'),(238,'val_238'))
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 500
+ rawDataSize 5312
+ totalSize 5812
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- partition. skewed value is 484/238
+alter table fact_daily skewed by (key, value) on (('484','val_484'),('238','val_238')) stored as DIRECTORIES
PREHOOK: type: ALTERTABLE_SKEWED
PREHOOK: Input: default@fact_daily
PREHOOK: Output: default@fact_daily
-POSTHOOK: query: -- switch fact_daily to skewed table, create partition ds=1 and hr=5 and point its location to /fact_daily/ds=1
-alter table fact_daily skewed by (x,y) on ((484,'val_484'),(238,'val_238'))
+POSTHOOK: query: -- partition. skewed value is 484/238
+alter table fact_daily skewed by (key, value) on (('484','val_484'),('238','val_238')) stored as DIRECTORIES
POSTHOOK: type: ALTERTABLE_SKEWED
POSTHOOK: Input: default@fact_daily
POSTHOOK: Output: default@fact_daily
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=4).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=4).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=4).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=200,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=200,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=200,hr=1).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-PREHOOK: query: ALTER TABLE fact_daily SET TBLPROPERTIES('EXTERNAL'='TRUE')
-PREHOOK: type: ALTERTABLE_PROPERTIES
-PREHOOK: Input: default@fact_daily
-PREHOOK: Output: default@fact_daily
-POSTHOOK: query: ALTER TABLE fact_daily SET TBLPROPERTIES('EXTERNAL'='TRUE')
-POSTHOOK: type: ALTERTABLE_PROPERTIES
-POSTHOOK: Input: default@fact_daily
-POSTHOOK: Output: default@fact_daily
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=4).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=4).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=4).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=200,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=200,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=200,hr=1).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-PREHOOK: query: ALTER TABLE fact_daily ADD PARTITION (ds='1', hr='5')
-#### A masked pattern was here ####
-PREHOOK: type: ALTERTABLE_ADDPARTS
-PREHOOK: Input: default@fact_daily
-POSTHOOK: query: ALTER TABLE fact_daily ADD PARTITION (ds='1', hr='5')
-#### A masked pattern was here ####
-POSTHOOK: type: ALTERTABLE_ADDPARTS
-POSTHOOK: Input: default@fact_daily
-POSTHOOK: Output: default@fact_daily@ds=1/hr=5
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=4).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=4).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=4).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=200,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=200,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=200,hr=1).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-PREHOOK: query: -- set List Bucketing location map
-#### A masked pattern was here ####
-PREHOOK: type: ALTERTBLPART_SKEWED_LOCATION
-PREHOOK: Input: default@fact_daily
-PREHOOK: Output: default@fact_daily@ds=1/hr=5
-POSTHOOK: query: -- set List Bucketing location map
-#### A masked pattern was here ####
-POSTHOOK: type: ALTERTBLPART_SKEWED_LOCATION
-POSTHOOK: Input: default@fact_daily
-POSTHOOK: Input: default@fact_daily@ds=1/hr=5
-POSTHOOK: Output: default@fact_daily@ds=1/hr=5
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=4).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=4).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=4).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=200,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=200,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=200,hr=1).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-PREHOOK: query: describe formatted fact_daily PARTITION (ds = '1', hr='5')
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: insert overwrite table fact_daily partition (ds = '1', hr = '2')
+select key, value from src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@fact_daily@ds=1/hr=2
+POSTHOOK: query: insert overwrite table fact_daily partition (ds = '1', hr = '2')
+select key, value from src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@fact_daily@ds=1/hr=2
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: describe formatted fact_daily PARTITION (ds = '1', hr='2')
PREHOOK: type: DESCTABLE
-POSTHOOK: query: describe formatted fact_daily PARTITION (ds = '1', hr='5')
+POSTHOOK: query: describe formatted fact_daily PARTITION (ds = '1', hr='2')
POSTHOOK: type: DESCTABLE
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=4).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=4).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=4).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=200,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=200,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=200,hr=1).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
# col_name data_type comment
-x int None
-y string None
-z string None
+key string None
+value string None
# Partition Information
# col_name data_type comment
@@ -273,13 +136,17 @@ ds string
hr string None
# Detailed Partition Information
-Partition Value: [1, 5]
+Partition Value: [1, 2]
Database: default
Table: fact_daily
#### A masked pattern was here ####
Protect Mode: None
#### A masked pattern was here ####
Partition Parameters:
+ numFiles 3
+ numRows 500
+ rawDataSize 5312
+ totalSize 5812
#### A masked pattern was here ####
# Storage Information
@@ -290,108 +157,57 @@ Compressed: No
Num Buckets: -1
Bucket Columns: []
Sort Columns: []
-Skewed Columns: [x, y]
+Stored As SubDirectories: Yes
+Skewed Columns: [key, value]
Skewed Values: [[484, val_484], [238, val_238]]
#### A masked pattern was here ####
+Skewed Value to Truncated Path: {[484, val_484]=/fact_daily/ds=1/hr=2/key=484/value=val_484, [238, val_238]=/fact_daily/ds=1/hr=2/key=238/value=val_238}
Storage Desc Params:
serialization.format 1
-PREHOOK: query: -- alter skewed information and create partition ds=100 and hr=1
-alter table fact_daily skewed by (x,y) on ((495,'val_484'))
+PREHOOK: query: -- another partition. skewed value is 327
+alter table fact_daily skewed by (key, value) on (('327','val_327')) stored as DIRECTORIES
PREHOOK: type: ALTERTABLE_SKEWED
PREHOOK: Input: default@fact_daily
PREHOOK: Output: default@fact_daily
-POSTHOOK: query: -- alter skewed information and create partition ds=100 and hr=1
-alter table fact_daily skewed by (x,y) on ((495,'val_484'))
+POSTHOOK: query: -- another partition. skewed value is 327
+alter table fact_daily skewed by (key, value) on (('327','val_327')) stored as DIRECTORIES
POSTHOOK: type: ALTERTABLE_SKEWED
POSTHOOK: Input: default@fact_daily
POSTHOOK: Output: default@fact_daily
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=4).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=4).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=4).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=200,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=200,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=200,hr=1).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-PREHOOK: query: ALTER TABLE fact_daily ADD PARTITION (ds='100', hr='1')
-#### A masked pattern was here ####
-PREHOOK: type: ALTERTABLE_ADDPARTS
-PREHOOK: Input: default@fact_daily
-POSTHOOK: query: ALTER TABLE fact_daily ADD PARTITION (ds='100', hr='1')
-#### A masked pattern was here ####
-POSTHOOK: type: ALTERTABLE_ADDPARTS
-POSTHOOK: Input: default@fact_daily
-POSTHOOK: Output: default@fact_daily@ds=100/hr=1
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=4).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=4).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=4).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=200,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=200,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=200,hr=1).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-#### A masked pattern was here ####
-PREHOOK: type: ALTERTBLPART_SKEWED_LOCATION
-PREHOOK: Input: default@fact_daily
-PREHOOK: Output: default@fact_daily@ds=100/hr=1
-#### A masked pattern was here ####
-POSTHOOK: type: ALTERTBLPART_SKEWED_LOCATION
-POSTHOOK: Input: default@fact_daily
-POSTHOOK: Input: default@fact_daily@ds=100/hr=1
-POSTHOOK: Output: default@fact_daily@ds=100/hr=1
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=4).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=4).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=4).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=200,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=200,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=200,hr=1).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-PREHOOK: query: describe formatted fact_daily PARTITION (ds = '100', hr='1')
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: insert overwrite table fact_daily partition (ds = '1', hr = '3')
+select key, value from src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@fact_daily@ds=1/hr=3
+POSTHOOK: query: insert overwrite table fact_daily partition (ds = '1', hr = '3')
+select key, value from src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@fact_daily@ds=1/hr=3
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: describe formatted fact_daily PARTITION (ds = '1', hr='3')
PREHOOK: type: DESCTABLE
-POSTHOOK: query: describe formatted fact_daily PARTITION (ds = '100', hr='1')
+POSTHOOK: query: describe formatted fact_daily PARTITION (ds = '1', hr='3')
POSTHOOK: type: DESCTABLE
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=4).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=4).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=4).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=200,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=200,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=200,hr=1).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
# col_name data_type comment
-x int None
-y string None
-z string None
+key string None
+value string None
# Partition Information
# col_name data_type comment
@@ -400,13 +216,17 @@ ds string
hr string None
# Detailed Partition Information
-Partition Value: [100, 1]
+Partition Value: [1, 3]
Database: default
Table: fact_daily
#### A masked pattern was here ####
Protect Mode: None
#### A masked pattern was here ####
Partition Parameters:
+ numFiles 2
+ numRows 500
+ rawDataSize 5312
+ totalSize 5812
#### A masked pattern was here ####
# Storage Information
@@ -417,36 +237,29 @@ Compressed: No
Num Buckets: -1
Bucket Columns: []
Sort Columns: []
-Skewed Columns: [x, y]
-Skewed Values: [[495, val_484]]
+Stored As SubDirectories: Yes
+Skewed Columns: [key, value]
+Skewed Values: [[327, val_327]]
#### A masked pattern was here ####
+Skewed Value to Truncated Path: {[327, val_327]=/fact_daily/ds=1/hr=3/key=327/value=val_327}
Storage Desc Params:
serialization.format 1
PREHOOK: query: -- query non-skewed partition
explain extended
-select * from fact_daily where ds='200' and hr='1' and x=145
+select * from fact_daily where ds = '1' and hr='1' and key='145'
PREHOOK: type: QUERY
POSTHOOK: query: -- query non-skewed partition
explain extended
-select * from fact_daily where ds='200' and hr='1' and x=145
+select * from fact_daily where ds = '1' and hr='1' and key='145'
POSTHOOK: type: QUERY
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=4).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=4).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=4).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=200,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=200,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=200,hr=1).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
ABSTRACT SYNTAX TREE:
- (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME fact_daily))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (and (= (TOK_TABLE_OR_COL ds) '200') (= (TOK_TABLE_OR_COL hr) '1')) (= (TOK_TABLE_OR_COL x) 145)))))
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME fact_daily))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (and (= (TOK_TABLE_OR_COL ds) '1') (= (TOK_TABLE_OR_COL hr) '1')) (= (TOK_TABLE_OR_COL key) '145')))))
STAGE DEPENDENCIES:
Stage-1 is a root stage
@@ -463,21 +276,19 @@ STAGE PLANS:
Filter Operator
isSamplingPred: false
predicate:
- expr: (x = 145)
+ expr: (key = '145')
type: boolean
Select Operator
expressions:
- expr: x
- type: int
- expr: y
+ expr: key
type: string
- expr: z
+ expr: value
type: string
expr: ds
type: string
expr: hr
type: string
- outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ outputColumnNames: _col0, _col1, _col2, _col3
File Output Operator
compressed: false
GlobalTableId: 0
@@ -488,8 +299,8 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- columns _col0,_col1,_col2,_col3,_col4
- columns.types int:string:string:string:string
+ columns _col0,_col1,_col2,_col3
+ columns.types string:string:string:string
escape.delim \
serialization.format 1
TotalFiles: 1
@@ -505,187 +316,229 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
- ds 200
+ ds 1
hr 1
properties:
- EXTERNAL TRUE
bucket_count -1
- columns x,y,z
- columns.types int:string:string
+ columns key,value
+ columns.types string:string
#### A masked pattern was here ####
name default.fact_daily
numFiles 1
- numPartitions 5
- numRows 7
+ numPartitions 3
+ numRows 500
partition_columns ds/hr
- rawDataSize 133
- serialization.ddl struct fact_daily { i32 x, string y, string z}
+ rawDataSize 5312
+ serialization.ddl struct fact_daily { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 140
+ totalSize 5812
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- EXTERNAL TRUE
bucket_count -1
- columns x,y,z
- columns.types int:string:string
+ columns key,value
+ columns.types string:string
#### A masked pattern was here ####
name default.fact_daily
- numFiles 5
- numPartitions 5
- numRows 13
+ numFiles 6
+ numPartitions 3
+ numRows 1500
partition_columns ds/hr
- rawDataSize 241
- serialization.ddl struct fact_daily { i32 x, string y, string z}
+ rawDataSize 15936
+ serialization.ddl struct fact_daily { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 254
+ totalSize 17436
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.fact_daily
name: default.fact_daily
Truncated Path -> Alias:
- /fact_daily/ds=200/hr=1 [fact_daily]
+ /fact_daily/ds=1/hr=1 [fact_daily]
Stage: Stage-0
Fetch Operator
limit: -1
-PREHOOK: query: select * from fact_daily where ds='200' and hr='1' and x=145
+PREHOOK: query: select * from fact_daily where ds = '1' and hr='1' and key='145'
PREHOOK: type: QUERY
-PREHOOK: Input: default@fact_daily@ds=200/hr=1
+PREHOOK: Input: default@fact_daily@ds=1/hr=1
#### A masked pattern was here ####
-POSTHOOK: query: select * from fact_daily where ds='200' and hr='1' and x=145
+POSTHOOK: query: select * from fact_daily where ds = '1' and hr='1' and key='145'
POSTHOOK: type: QUERY
-POSTHOOK: Input: default@fact_daily@ds=200/hr=1
+POSTHOOK: Input: default@fact_daily@ds=1/hr=1
#### A masked pattern was here ####
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=4).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=4).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=4).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=200,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=200,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=200,hr=1).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-145 val_145 val_145 200 1
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+145 val_145 1 1
PREHOOK: query: explain extended
-select * from fact_daily where ds='200' and hr='1'
+select count(*) from fact_daily where ds = '1' and hr='1'
PREHOOK: type: QUERY
POSTHOOK: query: explain extended
-select * from fact_daily where ds='200' and hr='1'
+select count(*) from fact_daily where ds = '1' and hr='1'
POSTHOOK: type: QUERY
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=4).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=4).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=4).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=200,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=200,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=200,hr=1).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
ABSTRACT SYNTAX TREE:
- (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME fact_daily))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (= (TOK_TABLE_OR_COL ds) '200') (= (TOK_TABLE_OR_COL hr) '1')))))
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME fact_daily))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_WHERE (and (= (TOK_TABLE_OR_COL ds) '1') (= (TOK_TABLE_OR_COL hr) '1')))))
STAGE DEPENDENCIES:
+ Stage-1 is a root stage
Stage-0 is a root stage
STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ fact_daily
+ TableScan
+ alias: fact_daily
+ GatherStats: false
+ Select Operator
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: bigint
+ Needs Tagging: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: hr=1
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 1
+ hr 1
+ properties:
+ bucket_count -1
+ columns key,value
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.fact_daily
+ numFiles 1
+ numPartitions 3
+ numRows 500
+ partition_columns ds/hr
+ rawDataSize 5312
+ serialization.ddl struct fact_daily { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 5812
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns key,value
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.fact_daily
+ numFiles 6
+ numPartitions 3
+ numRows 1500
+ partition_columns ds/hr
+ rawDataSize 15936
+ serialization.ddl struct fact_daily { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 17436
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.fact_daily
+ name: default.fact_daily
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ mode: mergepartial
+ outputColumnNames: _col0
+ Select Operator
+ expressions:
+ expr: _col0
+ type: bigint
+ outputColumnNames: _col0
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ columns _col0
+ columns.types bigint
+ escape.delim \
+ serialization.format 1
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Truncated Path -> Alias:
+ /fact_daily/ds=1/hr=1 [fact_daily]
+
Stage: Stage-0
Fetch Operator
limit: -1
- Processor Tree:
- TableScan
- alias: fact_daily
- GatherStats: false
- Select Operator
- expressions:
- expr: x
- type: int
- expr: y
- type: string
- expr: z
- type: string
- expr: ds
- type: string
- expr: hr
- type: string
- outputColumnNames: _col0, _col1, _col2, _col3, _col4
- ListSink
-
-
-PREHOOK: query: select * from fact_daily where ds='200' and hr='1'
-PREHOOK: type: QUERY
-PREHOOK: Input: default@fact_daily@ds=200/hr=1
-#### A masked pattern was here ####
-POSTHOOK: query: select * from fact_daily where ds='200' and hr='1'
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@fact_daily@ds=200/hr=1
-#### A masked pattern was here ####
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=4).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=4).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=4).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=200,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=200,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=200,hr=1).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-406 val_406 val_406 200 1
-429 val_429 val_429 200 1
-145 val_145 val_145 200 1
-406 val_406 val_406 200 1
-406 val_406 val_406 200 1
-429 val_429 val_429 200 1
-406 val_406 val_406 200 1
+
+
+PREHOOK: query: select count(*) from fact_daily where ds = '1' and hr='1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@fact_daily@ds=1/hr=1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from fact_daily where ds = '1' and hr='1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@fact_daily@ds=1/hr=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+500
PREHOOK: query: -- query skewed partition
explain extended
-SELECT * FROM fact_daily WHERE ds='1' and hr='5' and (x=484 and y ='val_484')
+SELECT * FROM fact_daily WHERE ds='1' and hr='2' and (key='484' and value='val_484')
PREHOOK: type: QUERY
POSTHOOK: query: -- query skewed partition
explain extended
-SELECT * FROM fact_daily WHERE ds='1' and hr='5' and (x=484 and y ='val_484')
+SELECT * FROM fact_daily WHERE ds='1' and hr='2' and (key='484' and value='val_484')
POSTHOOK: type: QUERY
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=4).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=4).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=4).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=200,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=200,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=200,hr=1).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
ABSTRACT SYNTAX TREE:
- (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME fact_daily))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (and (= (TOK_TABLE_OR_COL ds) '1') (= (TOK_TABLE_OR_COL hr) '5')) (and (= (TOK_TABLE_OR_COL x) 484) (= (TOK_TABLE_OR_COL y) 'val_484'))))))
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME fact_daily))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (and (= (TOK_TABLE_OR_COL ds) '1') (= (TOK_TABLE_OR_COL hr) '2')) (and (= (TOK_TABLE_OR_COL key) '484') (= (TOK_TABLE_OR_COL value) 'val_484'))))))
STAGE DEPENDENCIES:
Stage-1 is a root stage
@@ -702,21 +555,19 @@ STAGE PLANS:
Filter Operator
isSamplingPred: false
predicate:
- expr: ((x = 484) and (y = 'val_484'))
+ expr: ((key = '484') and (value = 'val_484'))
type: boolean
Select Operator
expressions:
- expr: x
- type: int
- expr: y
+ expr: key
type: string
- expr: z
+ expr: value
type: string
expr: ds
type: string
expr: hr
type: string
- outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ outputColumnNames: _col0, _col1, _col2, _col3
File Output Operator
compressed: false
GlobalTableId: 0
@@ -727,8 +578,8 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- columns _col0,_col1,_col2,_col3,_col4
- columns.types int:string:string:string:string
+ columns _col0,_col1,_col2,_col3
+ columns.types string:string:string:string
escape.delim \
serialization.format 1
TotalFiles: 1
@@ -740,110 +591,90 @@ STAGE PLANS:
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: y=val_484
+ base file name: value=val_484
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
ds 1
- hr 5
+ hr 2
properties:
- EXTERNAL TRUE
bucket_count -1
- columns x,y,z
- columns.types int:string:string
+ columns key,value
+ columns.types string:string
#### A masked pattern was here ####
name default.fact_daily
- numFiles 5
- numPartitions 5
- numRows 13
+ numFiles 3
+ numPartitions 3
+ numRows 500
partition_columns ds/hr
- rawDataSize 241
- serialization.ddl struct fact_daily { i32 x, string y, string z}
+ rawDataSize 5312
+ serialization.ddl struct fact_daily { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 254
+ totalSize 5812
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- EXTERNAL TRUE
bucket_count -1
- columns x,y,z
- columns.types int:string:string
+ columns key,value
+ columns.types string:string
#### A masked pattern was here ####
name default.fact_daily
- numFiles 5
- numPartitions 5
- numRows 13
+ numFiles 6
+ numPartitions 3
+ numRows 1500
partition_columns ds/hr
- rawDataSize 241
- serialization.ddl struct fact_daily { i32 x, string y, string z}
+ rawDataSize 15936
+ serialization.ddl struct fact_daily { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 254
+ totalSize 17436
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.fact_daily
name: default.fact_daily
Truncated Path -> Alias:
- /fact_daily/ds=1/hr=5/x=484/y=val_484 [fact_daily]
+ /fact_daily/ds=1/hr=2/key=484/value=val_484 [fact_daily]
Stage: Stage-0
Fetch Operator
limit: -1
-PREHOOK: query: SELECT * FROM fact_daily WHERE ds='1' and hr='5' and (x=484 and y ='val_484')
+PREHOOK: query: SELECT * FROM fact_daily WHERE ds='1' and hr='2' and (key='484' and value='val_484')
PREHOOK: type: QUERY
-PREHOOK: Input: default@fact_daily@ds=1/hr=5
+PREHOOK: Input: default@fact_daily@ds=1/hr=2
#### A masked pattern was here ####
-POSTHOOK: query: SELECT * FROM fact_daily WHERE ds='1' and hr='5' and (x=484 and y ='val_484')
+POSTHOOK: query: SELECT * FROM fact_daily WHERE ds='1' and hr='2' and (key='484' and value='val_484')
POSTHOOK: type: QUERY
-POSTHOOK: Input: default@fact_daily@ds=1/hr=5
+POSTHOOK: Input: default@fact_daily@ds=1/hr=2
#### A masked pattern was here ####
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=4).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=4).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=4).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=200,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=200,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=200,hr=1).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-484 val_484 val_484 1 5
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+484 val_484 1 2
PREHOOK: query: -- query another skewed partition
explain extended
-SELECT * FROM fact_daily WHERE ds='100' and hr='1' and (x=495 and y ='val_484')
+SELECT * FROM fact_daily WHERE ds='1' and hr='3' and (key='327' and value='val_327')
PREHOOK: type: QUERY
POSTHOOK: query: -- query another skewed partition
explain extended
-SELECT * FROM fact_daily WHERE ds='100' and hr='1' and (x=495 and y ='val_484')
+SELECT * FROM fact_daily WHERE ds='1' and hr='3' and (key='327' and value='val_327')
POSTHOOK: type: QUERY
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=4).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=4).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=4).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=200,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=200,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=200,hr=1).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
ABSTRACT SYNTAX TREE:
- (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME fact_daily))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (and (= (TOK_TABLE_OR_COL ds) '100') (= (TOK_TABLE_OR_COL hr) '1')) (and (= (TOK_TABLE_OR_COL x) 495) (= (TOK_TABLE_OR_COL y) 'val_484'))))))
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME fact_daily))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (and (= (TOK_TABLE_OR_COL ds) '1') (= (TOK_TABLE_OR_COL hr) '3')) (and (= (TOK_TABLE_OR_COL key) '327') (= (TOK_TABLE_OR_COL value) 'val_327'))))))
STAGE DEPENDENCIES:
Stage-1 is a root stage
@@ -860,21 +691,19 @@ STAGE PLANS:
Filter Operator
isSamplingPred: false
predicate:
- expr: ((x = 495) and (y = 'val_484'))
+ expr: ((key = '327') and (value = 'val_327'))
type: boolean
Select Operator
expressions:
- expr: x
- type: int
- expr: y
+ expr: key
type: string
- expr: z
+ expr: value
type: string
expr: ds
type: string
expr: hr
type: string
- outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ outputColumnNames: _col0, _col1, _col2, _col3
File Output Operator
compressed: false
GlobalTableId: 0
@@ -885,8 +714,8 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- columns _col0,_col1,_col2,_col3,_col4
- columns.types int:string:string:string:string
+ columns _col0,_col1,_col2,_col3
+ columns.types string:string:string:string
escape.delim \
serialization.format 1
TotalFiles: 1
@@ -898,82 +727,73 @@ STAGE PLANS:
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: y=val_484
+ base file name: value=val_327
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
- ds 100
- hr 1
+ ds 1
+ hr 3
properties:
- EXTERNAL TRUE
bucket_count -1
- columns x,y,z
- columns.types int:string:string
+ columns key,value
+ columns.types string:string
#### A masked pattern was here ####
name default.fact_daily
- numFiles 5
- numPartitions 5
- numRows 13
+ numFiles 2
+ numPartitions 3
+ numRows 500
partition_columns ds/hr
- rawDataSize 241
- serialization.ddl struct fact_daily { i32 x, string y, string z}
+ rawDataSize 5312
+ serialization.ddl struct fact_daily { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 254
+ totalSize 5812
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- EXTERNAL TRUE
bucket_count -1
- columns x,y,z
- columns.types int:string:string
+ columns key,value
+ columns.types string:string
#### A masked pattern was here ####
name default.fact_daily
- numFiles 5
- numPartitions 5
- numRows 13
+ numFiles 6
+ numPartitions 3
+ numRows 1500
partition_columns ds/hr
- rawDataSize 241
- serialization.ddl struct fact_daily { i32 x, string y, string z}
+ rawDataSize 15936
+ serialization.ddl struct fact_daily { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 254
+ totalSize 17436
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.fact_daily
name: default.fact_daily
Truncated Path -> Alias:
- /fact_daily/ds=1/hr=5/x=495/y=val_484 [fact_daily]
+ /fact_daily/ds=1/hr=3/key=327/value=val_327 [fact_daily]
Stage: Stage-0
Fetch Operator
limit: -1
-PREHOOK: query: SELECT * FROM fact_daily WHERE ds='100' and hr='1' and (x=495 and y ='val_484')
+PREHOOK: query: SELECT * FROM fact_daily WHERE ds='1' and hr='3' and (key='327' and value='val_327')
PREHOOK: type: QUERY
-PREHOOK: Input: default@fact_daily@ds=100/hr=1
+PREHOOK: Input: default@fact_daily@ds=1/hr=3
#### A masked pattern was here ####
-POSTHOOK: query: SELECT * FROM fact_daily WHERE ds='100' and hr='1' and (x=495 and y ='val_484')
+POSTHOOK: query: SELECT * FROM fact_daily WHERE ds='1' and hr='3' and (key='327' and value='val_327')
POSTHOOK: type: QUERY
-POSTHOOK: Input: default@fact_daily@ds=100/hr=1
+POSTHOOK: Input: default@fact_daily@ds=1/hr=3
#### A masked pattern was here ####
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=4).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=4).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=4).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=200,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=200,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: fact_daily PARTITION(ds=200,hr=1).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-495 val_484 val_484 100 1
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
[... 6 lines stripped ...]