You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by px...@apache.org on 2016/05/24 04:11:39 UTC
[4/7] hive git commit: HIVE-13566: Auto-gather column stats - phase 1
(Pengcheng Xiong, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/ec4b936e/ql/src/test/results/clientpositive/autoColumnStats_3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/autoColumnStats_3.q.out b/ql/src/test/results/clientpositive/autoColumnStats_3.q.out
new file mode 100644
index 0000000..ee41910
--- /dev/null
+++ b/ql/src/test/results/clientpositive/autoColumnStats_3.q.out
@@ -0,0 +1,420 @@
+PREHOOK: query: drop table src_multi1
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table src_multi1
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table src_multi1 like src
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@src_multi1
+POSTHOOK: query: create table src_multi1 like src
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@src_multi1
+PREHOOK: query: analyze table src_multi1 compute statistics for columns key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src_multi1
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table src_multi1 compute statistics for columns key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src_multi1
+#### A masked pattern was here ####
+PREHOOK: query: describe formatted src_multi1
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@src_multi1
+POSTHOOK: query: describe formatted src_multi1
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@src_multi1
+# col_name data_type comment
+
+key string default
+value string default
+
+# Detailed Table Information
+Database: default
+#### A masked pattern was here ####
+Retention: 0
+#### A masked pattern was here ####
+Table Type: MANAGED_TABLE
+Table Parameters:
+ COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\"},\"BASIC_STATS\":\"true\"}
+ numFiles 0
+ numRows 0
+ rawDataSize 0
+ totalSize 0
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: insert into table src_multi1 select * from src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@src_multi1
+POSTHOOK: query: insert into table src_multi1 select * from src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@src_multi1
+POSTHOOK: Lineage: src_multi1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: src_multi1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: describe formatted src_multi1
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@src_multi1
+POSTHOOK: query: describe formatted src_multi1
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@src_multi1
+# col_name data_type comment
+
+key string default
+value string default
+
+# Detailed Table Information
+Database: default
+#### A masked pattern was here ####
+Retention: 0
+#### A masked pattern was here ####
+Table Type: MANAGED_TABLE
+Table Parameters:
+ COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"},\"BASIC_STATS\":\"true\"}
+ numFiles 1
+ numRows 500
+ rawDataSize 5312
+ totalSize 5812
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: drop table nzhang_part14
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table nzhang_part14
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table if not exists nzhang_part14 (key string, value string)
+ partitioned by (ds string, hr string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@nzhang_part14
+POSTHOOK: query: create table if not exists nzhang_part14 (key string, value string)
+ partitioned by (ds string, hr string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@nzhang_part14
+PREHOOK: query: describe formatted nzhang_part14
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@nzhang_part14
+POSTHOOK: query: describe formatted nzhang_part14
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@nzhang_part14
+# col_name data_type comment
+
+key string
+value string
+
+# Partition Information
+# col_name data_type comment
+
+ds string
+hr string
+
+# Detailed Table Information
+Database: default
+#### A masked pattern was here ####
+Retention: 0
+#### A masked pattern was here ####
+Table Type: MANAGED_TABLE
+Table Parameters:
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: insert into table nzhang_part14 partition(ds, hr)
+select key, value, ds, hr from (
+ select * from (select 'k1' as key, cast(null as string) as value, '1' as ds, '2' as hr from src limit 2)a
+ union all
+ select * from (select 'k2' as key, '' as value, '1' as ds, '3' as hr from src limit 2)b
+ union all
+ select * from (select 'k3' as key, ' ' as value, '2' as ds, '1' as hr from src limit 2)c
+) T
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@nzhang_part14
+POSTHOOK: query: insert into table nzhang_part14 partition(ds, hr)
+select key, value, ds, hr from (
+ select * from (select 'k1' as key, cast(null as string) as value, '1' as ds, '2' as hr from src limit 2)a
+ union all
+ select * from (select 'k2' as key, '' as value, '1' as ds, '3' as hr from src limit 2)b
+ union all
+ select * from (select 'k3' as key, ' ' as value, '2' as ds, '1' as hr from src limit 2)c
+) T
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@nzhang_part14@ds=1/hr=2
+POSTHOOK: Output: default@nzhang_part14@ds=1/hr=3
+POSTHOOK: Output: default@nzhang_part14@ds=2/hr=1
+POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=1,hr=2).key EXPRESSION []
+POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=1,hr=2).value EXPRESSION []
+POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=1,hr=3).key EXPRESSION []
+POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=1,hr=3).value EXPRESSION []
+POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=2,hr=1).key EXPRESSION []
+POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=2,hr=1).value EXPRESSION []
+PREHOOK: query: desc formatted nzhang_part14 partition(ds='1', hr='3')
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@nzhang_part14
+POSTHOOK: query: desc formatted nzhang_part14 partition(ds='1', hr='3')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@nzhang_part14
+# col_name data_type comment
+
+key string
+value string
+
+# Partition Information
+# col_name data_type comment
+
+ds string
+hr string
+
+# Detailed Partition Information
+Partition Value: [1, 3]
+Database: default
+Table: nzhang_part14
+#### A masked pattern was here ####
+Partition Parameters:
+ COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+ numFiles 1
+ numRows 2
+ rawDataSize 6
+ totalSize 8
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: analyze table nzhang_part14 partition(ds='1', hr='3') compute statistics for columns value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@nzhang_part14
+PREHOOK: Input: default@nzhang_part14@ds=1/hr=3
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table nzhang_part14 partition(ds='1', hr='3') compute statistics for columns value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@nzhang_part14
+POSTHOOK: Input: default@nzhang_part14@ds=1/hr=3
+#### A masked pattern was here ####
+PREHOOK: query: desc formatted nzhang_part14 partition(ds='1', hr='3')
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@nzhang_part14
+POSTHOOK: query: desc formatted nzhang_part14 partition(ds='1', hr='3')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@nzhang_part14
+# col_name data_type comment
+
+key string
+value string
+
+# Partition Information
+# col_name data_type comment
+
+ds string
+hr string
+
+# Detailed Partition Information
+Partition Value: [1, 3]
+Database: default
+Table: nzhang_part14
+#### A masked pattern was here ####
+Partition Parameters:
+ COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"value\":\"true\"},\"BASIC_STATS\":\"true\"}
+ numFiles 1
+ numRows 2
+ rawDataSize 6
+ totalSize 8
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: desc formatted nzhang_part14 partition(ds='2', hr='1')
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@nzhang_part14
+POSTHOOK: query: desc formatted nzhang_part14 partition(ds='2', hr='1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@nzhang_part14
+# col_name data_type comment
+
+key string
+value string
+
+# Partition Information
+# col_name data_type comment
+
+ds string
+hr string
+
+# Detailed Partition Information
+Partition Value: [2, 1]
+Database: default
+Table: nzhang_part14
+#### A masked pattern was here ####
+Partition Parameters:
+ COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+ numFiles 1
+ numRows 2
+ rawDataSize 8
+ totalSize 10
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: insert into table nzhang_part14 partition(ds, hr)
+select key, value, ds, hr from (
+ select * from (select 'k1' as key, cast(null as string) as value, '1' as ds, '2' as hr from src limit 2)a
+ union all
+ select * from (select 'k2' as key, '' as value, '1' as ds, '3' as hr from src limit 2)b
+ union all
+ select * from (select 'k3' as key, ' ' as value, '2' as ds, '1' as hr from src limit 2)c
+) T
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@nzhang_part14
+POSTHOOK: query: insert into table nzhang_part14 partition(ds, hr)
+select key, value, ds, hr from (
+ select * from (select 'k1' as key, cast(null as string) as value, '1' as ds, '2' as hr from src limit 2)a
+ union all
+ select * from (select 'k2' as key, '' as value, '1' as ds, '3' as hr from src limit 2)b
+ union all
+ select * from (select 'k3' as key, ' ' as value, '2' as ds, '1' as hr from src limit 2)c
+) T
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@nzhang_part14@ds=1/hr=2
+POSTHOOK: Output: default@nzhang_part14@ds=1/hr=3
+POSTHOOK: Output: default@nzhang_part14@ds=2/hr=1
+POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=1,hr=2).key EXPRESSION []
+POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=1,hr=2).value EXPRESSION []
+POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=1,hr=3).key EXPRESSION []
+POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=1,hr=3).value EXPRESSION []
+POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=2,hr=1).key EXPRESSION []
+POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=2,hr=1).value EXPRESSION []
+PREHOOK: query: desc formatted nzhang_part14 partition(ds='1', hr='3')
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@nzhang_part14
+POSTHOOK: query: desc formatted nzhang_part14 partition(ds='1', hr='3')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@nzhang_part14
+# col_name data_type comment
+
+key string
+value string
+
+# Partition Information
+# col_name data_type comment
+
+ds string
+hr string
+
+# Detailed Partition Information
+Partition Value: [1, 3]
+Database: default
+Table: nzhang_part14
+#### A masked pattern was here ####
+Partition Parameters:
+ COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"},\"BASIC_STATS\":\"true\"}
+ numFiles 2
+ numRows 4
+ rawDataSize 12
+ totalSize 16
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: desc formatted nzhang_part14 partition(ds='2', hr='1')
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@nzhang_part14
+POSTHOOK: query: desc formatted nzhang_part14 partition(ds='2', hr='1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@nzhang_part14
+# col_name data_type comment
+
+key string
+value string
+
+# Partition Information
+# col_name data_type comment
+
+ds string
+hr string
+
+# Detailed Partition Information
+Partition Value: [2, 1]
+Database: default
+Table: nzhang_part14
+#### A masked pattern was here ####
+Partition Parameters:
+ COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"},\"BASIC_STATS\":\"true\"}
+ numFiles 2
+ numRows 4
+ rawDataSize 16
+ totalSize 20
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
http://git-wip-us.apache.org/repos/asf/hive/blob/ec4b936e/ql/src/test/results/clientpositive/autoColumnStats_4.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/autoColumnStats_4.q.out b/ql/src/test/results/clientpositive/autoColumnStats_4.q.out
new file mode 100644
index 0000000..676a27a
--- /dev/null
+++ b/ql/src/test/results/clientpositive/autoColumnStats_4.q.out
@@ -0,0 +1,260 @@
+PREHOOK: query: create table acid_dtt(a int, b varchar(128)) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@acid_dtt
+POSTHOOK: query: create table acid_dtt(a int, b varchar(128)) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@acid_dtt
+PREHOOK: query: desc formatted acid_dtt
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@acid_dtt
+POSTHOOK: query: desc formatted acid_dtt
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@acid_dtt
+# col_name data_type comment
+
+a int
+b varchar(128)
+
+# Detailed Table Information
+Database: default
+#### A masked pattern was here ####
+Retention: 0
+#### A masked pattern was here ####
+Table Type: MANAGED_TABLE
+Table Parameters:
+ COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+ numFiles 0
+ numRows 0
+ rawDataSize 0
+ totalSize 0
+ transactional true
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+Compressed: No
+Num Buckets: 2
+Bucket Columns: [a]
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: explain insert into table acid_dtt select cint, cast(cstring1 as varchar(128)) from alltypesorc where cint is not null order by cint limit 10
+PREHOOK: type: QUERY
+POSTHOOK: query: explain insert into table acid_dtt select cint, cast(cstring1 as varchar(128)) from alltypesorc where cint is not null order by cint limit 10
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+ Stage-3 depends on stages: Stage-0
+ Stage-5 depends on stages: Stage-3, Stage-4
+ Stage-4 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: alltypesorc
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: cint is not null (type: boolean)
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cint (type: int), CAST( cstring1 AS varchar(128)) (type: varchar(128))
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: varchar(128))
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: varchar(128))
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 10
+ Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col1 (type: varchar(128))
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: int), VALUE._col1 (type: varchar(128))
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.acid_dtt
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: varchar(128))
+ outputColumnNames: a, b
+ Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(a, 16), compute_stats(b, 16)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: false
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.acid_dtt
+
+ Stage: Stage-3
+ Stats-Aggr Operator
+
+ Stage: Stage-5
+ Column Stats Work
+ Column Stats Desc:
+ Columns: a, b
+ Column Types: int, varchar(128)
+ Table: default.acid_dtt
+
+ Stage: Stage-4
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+PREHOOK: query: insert into table acid_dtt select cint, cast(cstring1 as varchar(128)) from alltypesorc where cint is not null order by cint limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: default@acid_dtt
+POSTHOOK: query: insert into table acid_dtt select cint, cast(cstring1 as varchar(128)) from alltypesorc where cint is not null order by cint limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: default@acid_dtt
+POSTHOOK: Lineage: acid_dtt.a SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ]
+POSTHOOK: Lineage: acid_dtt.b EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ]
+PREHOOK: query: desc formatted acid_dtt
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@acid_dtt
+POSTHOOK: query: desc formatted acid_dtt
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@acid_dtt
+# col_name data_type comment
+
+a int
+b varchar(128)
+
+# Detailed Table Information
+Database: default
+#### A masked pattern was here ####
+Retention: 0
+#### A masked pattern was here ####
+Table Type: MANAGED_TABLE
+Table Parameters:
+ COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\"},\"BASIC_STATS\":\"true\"}
+ numFiles 2
+ numRows 10
+ rawDataSize 0
+ totalSize 1714
+ transactional true
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+Compressed: No
+Num Buckets: 2
+Bucket Columns: [a]
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: delete from acid_dtt where b = '0ruyd6Y50JpdGRf6HqD' or b = '2uLyD28144vklju213J1mr'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@acid_dtt
+PREHOOK: Output: default@acid_dtt
+POSTHOOK: query: delete from acid_dtt where b = '0ruyd6Y50JpdGRf6HqD' or b = '2uLyD28144vklju213J1mr'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@acid_dtt
+POSTHOOK: Output: default@acid_dtt
+PREHOOK: query: desc formatted acid_dtt
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@acid_dtt
+POSTHOOK: query: desc formatted acid_dtt
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@acid_dtt
+# col_name data_type comment
+
+a int
+b varchar(128)
+
+# Detailed Table Information
+Database: default
+#### A masked pattern was here ####
+Retention: 0
+#### A masked pattern was here ####
+Table Type: MANAGED_TABLE
+Table Parameters:
+ COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+ numFiles 4
+ numRows 8
+ rawDataSize 0
+ totalSize 2719
+ transactional true
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+Compressed: No
+Num Buckets: 2
+Bucket Columns: [a]
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
http://git-wip-us.apache.org/repos/asf/hive/blob/ec4b936e/ql/src/test/results/clientpositive/autoColumnStats_5.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/autoColumnStats_5.q.out b/ql/src/test/results/clientpositive/autoColumnStats_5.q.out
new file mode 100644
index 0000000..e905748
--- /dev/null
+++ b/ql/src/test/results/clientpositive/autoColumnStats_5.q.out
@@ -0,0 +1,664 @@
+PREHOOK: query: -- SORT_QUERY_RESULTS
+--
+-- FILE VARIATION: TEXT, Non-Vectorized, MapWork, Partitioned
+--
+--
+-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT
+---
+CREATE TABLE partitioned1(a INT, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@partitioned1
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+--
+-- FILE VARIATION: TEXT, Non-Vectorized, MapWork, Partitioned
+--
+--
+-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT
+---
+CREATE TABLE partitioned1(a INT, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@partitioned1
+PREHOOK: query: explain insert into table partitioned1 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original')
+PREHOOK: type: QUERY
+POSTHOOK: query: explain insert into table partitioned1 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original')
+POSTHOOK: type: QUERY
+Explain
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5
+ Stage-4
+ Stage-0 depends on stages: Stage-4, Stage-3, Stage-6
+ Stage-2 depends on stages: Stage-0
+ Stage-8 depends on stages: Stage-2
+ Stage-3
+ Stage-5
+ Stage-6 depends on stages: Stage-5
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: values__tmp__table__1
+ Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: UDFToInteger(tmp_values_col1) (type: int), tmp_values_col2 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.partitioned1
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), 1 (type: int)
+ outputColumnNames: a, b, part
+ Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(a, 16), compute_stats(b, 16)
+ keys: 1 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: 1 (type: int)
+ sort order: +
+ Map-reduce partition columns: 1 (type: int)
+ Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+ keys: 1 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), 1 (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-7
+ Conditional Operator
+
+ Stage: Stage-4
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ part 1
+ replace: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.partitioned1
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+ Stage: Stage-8
+ Column Stats Work
+ Column Stats Desc:
+ Columns: a, b
+ Column Types: int, string
+ Table: default.partitioned1
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.partitioned1
+
+ Stage: Stage-5
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.partitioned1
+
+ Stage: Stage-6
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+PREHOOK: query: insert into table partitioned1 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__2
+PREHOOK: Output: default@partitioned1@part=1
+POSTHOOK: query: insert into table partitioned1 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__2
+POSTHOOK: Output: default@partitioned1@part=1
+POSTHOOK: Lineage: partitioned1 PARTITION(part=1).a EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: partitioned1 PARTITION(part=1).b SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+_col0 _col1
+PREHOOK: query: desc formatted partitioned1 partition(part=1)
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@partitioned1
+POSTHOOK: query: desc formatted partitioned1 partition(part=1)
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@partitioned1
+col_name data_type comment
+# col_name data_type comment
+
+a int
+b string
+
+# Partition Information
+# col_name data_type comment
+
+part int
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: partitioned1
+#### A masked pattern was here ####
+Partition Parameters:
+ COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\"},\"BASIC_STATS\":\"true\"}
+ numFiles 1
+ numRows 4
+ rawDataSize 40
+ totalSize 44
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: desc formatted partitioned1 partition(part=1) a
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@partitioned1
+POSTHOOK: query: desc formatted partitioned1 partition(part=1) a
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@partitioned1
+col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
+
+a int 1 4 0 5 from deserializer
+PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ...
+alter table partitioned1 add columns(c int, d string)
+PREHOOK: type: ALTERTABLE_ADDCOLS
+PREHOOK: Input: default@partitioned1
+PREHOOK: Output: default@partitioned1
+POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ...
+alter table partitioned1 add columns(c int, d string)
+POSTHOOK: type: ALTERTABLE_ADDCOLS
+POSTHOOK: Input: default@partitioned1
+POSTHOOK: Output: default@partitioned1
+PREHOOK: query: desc formatted partitioned1 partition(part=1)
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@partitioned1
+POSTHOOK: query: desc formatted partitioned1 partition(part=1)
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@partitioned1
+col_name data_type comment
+# col_name data_type comment
+
+a int
+b string
+
+# Partition Information
+# col_name data_type comment
+
+part int
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: partitioned1
+#### A masked pattern was here ####
+Partition Parameters:
+ COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\"},\"BASIC_STATS\":\"true\"}
+ numFiles 1
+ numRows 4
+ rawDataSize 40
+ totalSize 44
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: explain insert into table partitioned1 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty')
+PREHOOK: type: QUERY
+POSTHOOK: query: explain insert into table partitioned1 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty')
+POSTHOOK: type: QUERY
+Explain
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5
+ Stage-4
+ Stage-0 depends on stages: Stage-4, Stage-3, Stage-6
+ Stage-2 depends on stages: Stage-0
+ Stage-8 depends on stages: Stage-2
+ Stage-3
+ Stage-5
+ Stage-6 depends on stages: Stage-5
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: values__tmp__table__3
+ Statistics: Num rows: 1 Data size: 60 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: UDFToInteger(tmp_values_col1) (type: int), tmp_values_col2 (type: string), UDFToInteger(tmp_values_col3) (type: int), tmp_values_col4 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 60 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 60 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.partitioned1
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string), 2 (type: int)
+ outputColumnNames: a, b, c, d, part
+ Statistics: Num rows: 1 Data size: 60 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(a, 16), compute_stats(b, 16), compute_stats(c, 16), compute_stats(d, 16)
+ keys: 2 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 1 Data size: 60 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: 2 (type: int)
+ sort order: +
+ Map-reduce partition columns: 2 (type: int)
+ Statistics: Num rows: 1 Data size: 60 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>), _col3 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>), _col4 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3)
+ keys: 2 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 1 Data size: 60 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col3 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col4 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), 2 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 1 Data size: 60 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 60 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-7
+ Conditional Operator
+
+ Stage: Stage-4
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ part 2
+ replace: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.partitioned1
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+ Stage: Stage-8
+ Column Stats Work
+ Column Stats Desc:
+ Columns: a, b, c, d
+ Column Types: int, string, int, string
+ Table: default.partitioned1
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.partitioned1
+
+ Stage: Stage-5
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.partitioned1
+
+ Stage: Stage-6
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+PREHOOK: query: insert into table partitioned1 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__4
+PREHOOK: Output: default@partitioned1@part=2
+POSTHOOK: query: insert into table partitioned1 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__4
+POSTHOOK: Output: default@partitioned1@part=2
+POSTHOOK: Lineage: partitioned1 PARTITION(part=2).a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: partitioned1 PARTITION(part=2).b SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+POSTHOOK: Lineage: partitioned1 PARTITION(part=2).c EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col3, type:string, comment:), ]
+POSTHOOK: Lineage: partitioned1 PARTITION(part=2).d SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col4, type:string, comment:), ]
+_col0 _col1 _col2 _col3
+PREHOOK: query: desc formatted partitioned1 partition(part=2)
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@partitioned1
+POSTHOOK: query: desc formatted partitioned1 partition(part=2)
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@partitioned1
+col_name data_type comment
+# col_name data_type comment
+
+a int
+b string
+c int
+d string
+
+# Partition Information
+# col_name data_type comment
+
+part int
+
+# Detailed Partition Information
+Partition Value: [2]
+Database: default
+Table: partitioned1
+#### A masked pattern was here ####
+Partition Parameters:
+ COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\",\"d\":\"true\"},\"BASIC_STATS\":\"true\"}
+ numFiles 1
+ numRows 4
+ rawDataSize 56
+ totalSize 60
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: desc formatted partitioned1 partition(part=2) c
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@partitioned1
+POSTHOOK: query: desc formatted partitioned1 partition(part=2) c
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@partitioned1
+col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
+
+c int 10 40 0 4 from deserializer
+PREHOOK: query: explain insert into table partitioned1 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred')
+PREHOOK: type: QUERY
+POSTHOOK: query: explain insert into table partitioned1 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred')
+POSTHOOK: type: QUERY
+Explain
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5
+ Stage-4
+ Stage-0 depends on stages: Stage-4, Stage-3, Stage-6
+ Stage-2 depends on stages: Stage-0
+ Stage-8 depends on stages: Stage-2
+ Stage-3
+ Stage-5
+ Stage-6 depends on stages: Stage-5
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: values__tmp__table__5
+ Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: UDFToInteger(tmp_values_col1) (type: int), tmp_values_col2 (type: string), UDFToInteger(tmp_values_col3) (type: int), tmp_values_col4 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.partitioned1
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string), 1 (type: int)
+ outputColumnNames: a, b, c, d, part
+ Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(a, 16), compute_stats(b, 16), compute_stats(c, 16), compute_stats(d, 16)
+ keys: 1 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: 1 (type: int)
+ sort order: +
+ Map-reduce partition columns: 1 (type: int)
+ Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>), _col3 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>), _col4 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3)
+ keys: 1 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col3 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col4 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), 1 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-7
+ Conditional Operator
+
+ Stage: Stage-4
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ part 1
+ replace: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.partitioned1
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+ Stage: Stage-8
+ Column Stats Work
+ Column Stats Desc:
+ Columns: a, b, c, d
+ Column Types: int, string, int, string
+ Table: default.partitioned1
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.partitioned1
+
+ Stage: Stage-5
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.partitioned1
+
+ Stage: Stage-6
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+PREHOOK: query: insert into table partitioned1 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__6
+PREHOOK: Output: default@partitioned1@part=1
+POSTHOOK: query: insert into table partitioned1 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__6
+POSTHOOK: Output: default@partitioned1@part=1
+POSTHOOK: Lineage: partitioned1 PARTITION(part=1).a EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: partitioned1 PARTITION(part=1).b SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+POSTHOOK: Lineage: partitioned1 PARTITION(part=1).c EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col3, type:string, comment:), ]
+POSTHOOK: Lineage: partitioned1 PARTITION(part=1).d SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col4, type:string, comment:), ]
+_col0 _col1 _col2 _col3
+PREHOOK: query: desc formatted partitioned1 partition(part=1)
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@partitioned1
+POSTHOOK: query: desc formatted partitioned1 partition(part=1)
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@partitioned1
+col_name data_type comment
+# col_name data_type comment
+
+a int
+b string
+
+# Partition Information
+# col_name data_type comment
+
+part int
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: partitioned1
+#### A masked pattern was here ####
+Partition Parameters:
+ COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\",\"d\":\"true\"},\"BASIC_STATS\":\"true\"}
+ numFiles 2
+ numRows 6
+ rawDataSize 78
+ totalSize 84
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: desc formatted partitioned1 partition(part=1) a
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@partitioned1
+POSTHOOK: query: desc formatted partitioned1 partition(part=1) a
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@partitioned1
+col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
+
+a int 1 6 0 5 from deserializer
+PREHOOK: query: desc formatted partitioned1 partition(part=1) c
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@partitioned1
+POSTHOOK: query: desc formatted partitioned1 partition(part=1) c
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@partitioned1
+col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
+
+c int 100 200 0 3 from deserializer
http://git-wip-us.apache.org/repos/asf/hive/blob/ec4b936e/ql/src/test/results/clientpositive/autoColumnStats_6.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/autoColumnStats_6.q.out b/ql/src/test/results/clientpositive/autoColumnStats_6.q.out
new file mode 100644
index 0000000..de75d8a
--- /dev/null
+++ b/ql/src/test/results/clientpositive/autoColumnStats_6.q.out
@@ -0,0 +1,299 @@
+PREHOOK: query: DROP TABLE orcfile_merge2a
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE orcfile_merge2a
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: CREATE TABLE orcfile_merge2a (key INT, value STRING)
+ PARTITIONED BY (one string, two string, three string)
+ STORED AS ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@orcfile_merge2a
+POSTHOOK: query: CREATE TABLE orcfile_merge2a (key INT, value STRING)
+ PARTITIONED BY (one string, two string, three string)
+ STORED AS ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@orcfile_merge2a
+PREHOOK: query: EXPLAIN INSERT OVERWRITE TABLE orcfile_merge2a PARTITION (one='1', two, three)
+ SELECT key, value, PMOD(HASH(key), 10) as two,
+ PMOD(HASH(value), 10) as three
+ FROM src
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN INSERT OVERWRITE TABLE orcfile_merge2a PARTITION (one='1', two, three)
+ SELECT key, value, PMOD(HASH(key), 10) as two,
+ PMOD(HASH(value), 10) as three
+ FROM src
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5
+ Stage-4
+ Stage-0 depends on stages: Stage-4, Stage-3, Stage-6
+ Stage-2 depends on stages: Stage-0
+ Stage-8 depends on stages: Stage-2
+ Stage-3
+ Stage-5
+ Stage-6 depends on stages: Stage-5
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 10) (type: int), (hash(value) pmod 10) (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.orcfile_merge2a
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string), UDFToString(_col3) (type: string)
+ outputColumnNames: key, value, one, two, three
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key, 16), compute_stats(value, 16)
+ keys: '1' (type: string), two (type: string), three (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: '1' (type: string), _col1 (type: string), _col2 (type: string)
+ sort order: +++
+ Map-reduce partition columns: '1' (type: string), _col1 (type: string), _col2 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>), _col4 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+ keys: '1' (type: string), KEY._col1 (type: string), KEY._col2 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col3 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col4 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), '1' (type: string), _col1 (type: string), _col2 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-7
+ Conditional Operator
+
+ Stage: Stage-4
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ one 1
+ three
+ two
+ replace: true
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.orcfile_merge2a
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+ Stage: Stage-8
+ Column Stats Work
+ Column Stats Desc:
+ Columns: key, value
+ Column Types: int, string
+ Table: default.orcfile_merge2a
+
+ Stage: Stage-3
+ Merge File Operator
+ Map Operator Tree:
+ ORC File Merge Operator
+ merge level: stripe
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+
+ Stage: Stage-5
+ Merge File Operator
+ Map Operator Tree:
+ ORC File Merge Operator
+ merge level: stripe
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+
+ Stage: Stage-6
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+PREHOOK: query: INSERT OVERWRITE TABLE orcfile_merge2a PARTITION (one='1', two, three)
+ SELECT key, value, PMOD(HASH(key), 10) as two,
+ PMOD(HASH(value), 10) as three
+ FROM src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@orcfile_merge2a@one=1
+POSTHOOK: query: INSERT OVERWRITE TABLE orcfile_merge2a PARTITION (one='1', two, three)
+ SELECT key, value, PMOD(HASH(key), 10) as two,
+ PMOD(HASH(value), 10) as three
+ FROM src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@orcfile_merge2a@one=1/two=0/three=2
+POSTHOOK: Output: default@orcfile_merge2a@one=1/two=0/three=8
+POSTHOOK: Output: default@orcfile_merge2a@one=1/two=1/three=3
+POSTHOOK: Output: default@orcfile_merge2a@one=1/two=1/three=9
+POSTHOOK: Output: default@orcfile_merge2a@one=1/two=2/three=0
+POSTHOOK: Output: default@orcfile_merge2a@one=1/two=2/three=4
+POSTHOOK: Output: default@orcfile_merge2a@one=1/two=3/three=1
+POSTHOOK: Output: default@orcfile_merge2a@one=1/two=3/three=5
+POSTHOOK: Output: default@orcfile_merge2a@one=1/two=4/three=2
+POSTHOOK: Output: default@orcfile_merge2a@one=1/two=4/three=6
+POSTHOOK: Output: default@orcfile_merge2a@one=1/two=5/three=3
+POSTHOOK: Output: default@orcfile_merge2a@one=1/two=5/three=7
+POSTHOOK: Output: default@orcfile_merge2a@one=1/two=6/three=4
+POSTHOOK: Output: default@orcfile_merge2a@one=1/two=6/three=8
+POSTHOOK: Output: default@orcfile_merge2a@one=1/two=7/three=5
+POSTHOOK: Output: default@orcfile_merge2a@one=1/two=7/three=9
+POSTHOOK: Output: default@orcfile_merge2a@one=1/two=8/three=0
+POSTHOOK: Output: default@orcfile_merge2a@one=1/two=8/three=6
+POSTHOOK: Output: default@orcfile_merge2a@one=1/two=9/three=1
+POSTHOOK: Output: default@orcfile_merge2a@one=1/two=9/three=7
+POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=0,three=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=0,three=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=0,three=8).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=0,three=8).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=1,three=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=1,three=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=1,three=9).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=1,three=9).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=2,three=0).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=2,three=0).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=2,three=4).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=2,three=4).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=3,three=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=3,three=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=3,three=5).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=3,three=5).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=4,three=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=4,three=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=4,three=6).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=4,three=6).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=5,three=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=5,three=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=5,three=7).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=5,three=7).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=6,three=4).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=6,three=4).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=6,three=8).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=6,three=8).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=7,three=5).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=7,three=5).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=7,three=9).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=7,three=9).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=8,three=0).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=8,three=0).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=8,three=6).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=8,three=6).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=9,three=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=9,three=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=9,three=7).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=9,three=7).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+Found 1 items
+#### A masked pattern was here ####
+PREHOOK: query: SELECT SUM(HASH(c)) FROM (
+ SELECT TRANSFORM(*) USING 'tr \t _' AS (c)
+ FROM orcfile_merge2a
+) t
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orcfile_merge2a
+PREHOOK: Input: default@orcfile_merge2a@one=1/two=0/three=2
+PREHOOK: Input: default@orcfile_merge2a@one=1/two=0/three=8
+PREHOOK: Input: default@orcfile_merge2a@one=1/two=1/three=3
+PREHOOK: Input: default@orcfile_merge2a@one=1/two=1/three=9
+PREHOOK: Input: default@orcfile_merge2a@one=1/two=2/three=0
+PREHOOK: Input: default@orcfile_merge2a@one=1/two=2/three=4
+PREHOOK: Input: default@orcfile_merge2a@one=1/two=3/three=1
+PREHOOK: Input: default@orcfile_merge2a@one=1/two=3/three=5
+PREHOOK: Input: default@orcfile_merge2a@one=1/two=4/three=2
+PREHOOK: Input: default@orcfile_merge2a@one=1/two=4/three=6
+PREHOOK: Input: default@orcfile_merge2a@one=1/two=5/three=3
+PREHOOK: Input: default@orcfile_merge2a@one=1/two=5/three=7
+PREHOOK: Input: default@orcfile_merge2a@one=1/two=6/three=4
+PREHOOK: Input: default@orcfile_merge2a@one=1/two=6/three=8
+PREHOOK: Input: default@orcfile_merge2a@one=1/two=7/three=5
+PREHOOK: Input: default@orcfile_merge2a@one=1/two=7/three=9
+PREHOOK: Input: default@orcfile_merge2a@one=1/two=8/three=0
+PREHOOK: Input: default@orcfile_merge2a@one=1/two=8/three=6
+PREHOOK: Input: default@orcfile_merge2a@one=1/two=9/three=1
+PREHOOK: Input: default@orcfile_merge2a@one=1/two=9/three=7
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT SUM(HASH(c)) FROM (
+ SELECT TRANSFORM(*) USING 'tr \t _' AS (c)
+ FROM orcfile_merge2a
+) t
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orcfile_merge2a
+POSTHOOK: Input: default@orcfile_merge2a@one=1/two=0/three=2
+POSTHOOK: Input: default@orcfile_merge2a@one=1/two=0/three=8
+POSTHOOK: Input: default@orcfile_merge2a@one=1/two=1/three=3
+POSTHOOK: Input: default@orcfile_merge2a@one=1/two=1/three=9
+POSTHOOK: Input: default@orcfile_merge2a@one=1/two=2/three=0
+POSTHOOK: Input: default@orcfile_merge2a@one=1/two=2/three=4
+POSTHOOK: Input: default@orcfile_merge2a@one=1/two=3/three=1
+POSTHOOK: Input: default@orcfile_merge2a@one=1/two=3/three=5
+POSTHOOK: Input: default@orcfile_merge2a@one=1/two=4/three=2
+POSTHOOK: Input: default@orcfile_merge2a@one=1/two=4/three=6
+POSTHOOK: Input: default@orcfile_merge2a@one=1/two=5/three=3
+POSTHOOK: Input: default@orcfile_merge2a@one=1/two=5/three=7
+POSTHOOK: Input: default@orcfile_merge2a@one=1/two=6/three=4
+POSTHOOK: Input: default@orcfile_merge2a@one=1/two=6/three=8
+POSTHOOK: Input: default@orcfile_merge2a@one=1/two=7/three=5
+POSTHOOK: Input: default@orcfile_merge2a@one=1/two=7/three=9
+POSTHOOK: Input: default@orcfile_merge2a@one=1/two=8/three=0
+POSTHOOK: Input: default@orcfile_merge2a@one=1/two=8/three=6
+POSTHOOK: Input: default@orcfile_merge2a@one=1/two=9/three=1
+POSTHOOK: Input: default@orcfile_merge2a@one=1/two=9/three=7
+#### A masked pattern was here ####
+-4209012844
+PREHOOK: query: SELECT SUM(HASH(c)) FROM (
+ SELECT TRANSFORM(key, value, '1', PMOD(HASH(key), 10),
+ PMOD(HASH(value), 10)) USING 'tr \t _' AS (c)
+ FROM src
+) t
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT SUM(HASH(c)) FROM (
+ SELECT TRANSFORM(key, value, '1', PMOD(HASH(key), 10),
+ PMOD(HASH(value), 10)) USING 'tr \t _' AS (c)
+ FROM src
+) t
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+-4209012844
+PREHOOK: query: DROP TABLE orcfile_merge2a
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@orcfile_merge2a
+PREHOOK: Output: default@orcfile_merge2a
+POSTHOOK: query: DROP TABLE orcfile_merge2a
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@orcfile_merge2a
+POSTHOOK: Output: default@orcfile_merge2a