You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by pr...@apache.org on 2016/07/28 19:02:38 UTC
[1/2] hive git commit: HIVE-14310: ORC schema evolution should not
completely disable PPD (Prasanth Jayachandran reviewd by Owen O'Malley)
Repository: hive
Updated Branches:
refs/heads/master 602a5f38b -> 949eed2d0
http://git-wip-us.apache.org/repos/asf/hive/blob/949eed2d/ql/src/test/results/clientpositive/tez/orc_ppd_schema_evol_3a.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/orc_ppd_schema_evol_3a.q.out b/ql/src/test/results/clientpositive/tez/orc_ppd_schema_evol_3a.q.out
new file mode 100644
index 0000000..20d7085
--- /dev/null
+++ b/ql/src/test/results/clientpositive/tez/orc_ppd_schema_evol_3a.q.out
@@ -0,0 +1,1132 @@
+PREHOOK: query: CREATE TABLE staging(t tinyint,
+ si smallint,
+ i int,
+ b bigint,
+ f float,
+ d double,
+ bo boolean,
+ s string,
+ ts timestamp,
+ dec decimal(4,2),
+ bin binary)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@staging
+POSTHOOK: query: CREATE TABLE staging(t tinyint,
+ si smallint,
+ i int,
+ b bigint,
+ f float,
+ d double,
+ bo boolean,
+ s string,
+ ts timestamp,
+ dec decimal(4,2),
+ bin binary)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@staging
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE staging
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@staging
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE staging
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@staging
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' INTO TABLE staging
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@staging
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' INTO TABLE staging
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@staging
+PREHOOK: query: CREATE TABLE orc_ppd_staging(t tinyint,
+ si smallint,
+ i int,
+ b bigint,
+ f float,
+ d double,
+ bo boolean,
+ s string,
+ c char(50),
+ v varchar(50),
+ da date,
+ ts timestamp,
+ dec decimal(4,2),
+ bin binary)
+STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*")
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@orc_ppd_staging
+POSTHOOK: query: CREATE TABLE orc_ppd_staging(t tinyint,
+ si smallint,
+ i int,
+ b bigint,
+ f float,
+ d double,
+ bo boolean,
+ s string,
+ c char(50),
+ v varchar(50),
+ da date,
+ ts timestamp,
+ dec decimal(4,2),
+ bin binary)
+STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*")
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@orc_ppd_staging
+PREHOOK: query: insert overwrite table orc_ppd_staging select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), cast(ts as date), ts, dec, bin from staging order by t, s
+PREHOOK: type: QUERY
+PREHOOK: Input: default@staging
+PREHOOK: Output: default@orc_ppd_staging
+POSTHOOK: query: insert overwrite table orc_ppd_staging select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), cast(ts as date), ts, dec, bin from staging order by t, s
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@staging
+POSTHOOK: Output: default@orc_ppd_staging
+POSTHOOK: Lineage: orc_ppd_staging.b SIMPLE [(staging)staging.FieldSchema(name:b, type:bigint, comment:null), ]
+POSTHOOK: Lineage: orc_ppd_staging.bin SIMPLE [(staging)staging.FieldSchema(name:bin, type:binary, comment:null), ]
+POSTHOOK: Lineage: orc_ppd_staging.bo SIMPLE [(staging)staging.FieldSchema(name:bo, type:boolean, comment:null), ]
+POSTHOOK: Lineage: orc_ppd_staging.c EXPRESSION [(staging)staging.FieldSchema(name:s, type:string, comment:null), ]
+POSTHOOK: Lineage: orc_ppd_staging.d SIMPLE [(staging)staging.FieldSchema(name:d, type:double, comment:null), ]
+POSTHOOK: Lineage: orc_ppd_staging.da EXPRESSION [(staging)staging.FieldSchema(name:ts, type:timestamp, comment:null), ]
+POSTHOOK: Lineage: orc_ppd_staging.dec SIMPLE [(staging)staging.FieldSchema(name:dec, type:decimal(4,2), comment:null), ]
+POSTHOOK: Lineage: orc_ppd_staging.f SIMPLE [(staging)staging.FieldSchema(name:f, type:float, comment:null), ]
+POSTHOOK: Lineage: orc_ppd_staging.i SIMPLE [(staging)staging.FieldSchema(name:i, type:int, comment:null), ]
+POSTHOOK: Lineage: orc_ppd_staging.s SIMPLE [(staging)staging.FieldSchema(name:s, type:string, comment:null), ]
+POSTHOOK: Lineage: orc_ppd_staging.si SIMPLE [(staging)staging.FieldSchema(name:si, type:smallint, comment:null), ]
+POSTHOOK: Lineage: orc_ppd_staging.t SIMPLE [(staging)staging.FieldSchema(name:t, type:tinyint, comment:null), ]
+POSTHOOK: Lineage: orc_ppd_staging.ts SIMPLE [(staging)staging.FieldSchema(name:ts, type:timestamp, comment:null), ]
+POSTHOOK: Lineage: orc_ppd_staging.v EXPRESSION [(staging)staging.FieldSchema(name:s, type:string, comment:null), ]
+PREHOOK: query: -- just to introduce a gap in min/max range for bloom filters. The dataset has contiguous values
+-- which makes it hard to test bloom filters
+insert into orc_ppd_staging select -10,-321,-65680,-4294967430,-97.94,-13.07,true,"aaa","aaa","aaa","1990-03-11","1990-03-11 10:11:58.703308",-71.54,"aaa" from staging limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@staging
+PREHOOK: Output: default@orc_ppd_staging
+POSTHOOK: query: -- just to introduce a gap in min/max range for bloom filters. The dataset has contiguous values
+-- which makes it hard to test bloom filters
+insert into orc_ppd_staging select -10,-321,-65680,-4294967430,-97.94,-13.07,true,"aaa","aaa","aaa","1990-03-11","1990-03-11 10:11:58.703308",-71.54,"aaa" from staging limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@staging
+POSTHOOK: Output: default@orc_ppd_staging
+POSTHOOK: Lineage: orc_ppd_staging.b EXPRESSION []
+POSTHOOK: Lineage: orc_ppd_staging.bin EXPRESSION []
+POSTHOOK: Lineage: orc_ppd_staging.bo SIMPLE []
+POSTHOOK: Lineage: orc_ppd_staging.c EXPRESSION []
+POSTHOOK: Lineage: orc_ppd_staging.d EXPRESSION []
+POSTHOOK: Lineage: orc_ppd_staging.da EXPRESSION []
+POSTHOOK: Lineage: orc_ppd_staging.dec EXPRESSION []
+POSTHOOK: Lineage: orc_ppd_staging.f EXPRESSION []
+POSTHOOK: Lineage: orc_ppd_staging.i EXPRESSION []
+POSTHOOK: Lineage: orc_ppd_staging.s SIMPLE []
+POSTHOOK: Lineage: orc_ppd_staging.si EXPRESSION []
+POSTHOOK: Lineage: orc_ppd_staging.t EXPRESSION []
+POSTHOOK: Lineage: orc_ppd_staging.ts EXPRESSION []
+POSTHOOK: Lineage: orc_ppd_staging.v EXPRESSION []
+PREHOOK: query: insert into orc_ppd_staging select 127,331,65690,4294967440,107.94,23.07,true,"zzz","zzz","zzz","2023-03-11","2023-03-11 10:11:58.703308",71.54,"zzz" from staging limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@staging
+PREHOOK: Output: default@orc_ppd_staging
+POSTHOOK: query: insert into orc_ppd_staging select 127,331,65690,4294967440,107.94,23.07,true,"zzz","zzz","zzz","2023-03-11","2023-03-11 10:11:58.703308",71.54,"zzz" from staging limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@staging
+POSTHOOK: Output: default@orc_ppd_staging
+POSTHOOK: Lineage: orc_ppd_staging.b SIMPLE []
+POSTHOOK: Lineage: orc_ppd_staging.bin EXPRESSION []
+POSTHOOK: Lineage: orc_ppd_staging.bo SIMPLE []
+POSTHOOK: Lineage: orc_ppd_staging.c EXPRESSION []
+POSTHOOK: Lineage: orc_ppd_staging.d EXPRESSION []
+POSTHOOK: Lineage: orc_ppd_staging.da EXPRESSION []
+POSTHOOK: Lineage: orc_ppd_staging.dec SIMPLE []
+POSTHOOK: Lineage: orc_ppd_staging.f EXPRESSION []
+POSTHOOK: Lineage: orc_ppd_staging.i SIMPLE []
+POSTHOOK: Lineage: orc_ppd_staging.s SIMPLE []
+POSTHOOK: Lineage: orc_ppd_staging.si EXPRESSION []
+POSTHOOK: Lineage: orc_ppd_staging.t EXPRESSION []
+POSTHOOK: Lineage: orc_ppd_staging.ts EXPRESSION []
+POSTHOOK: Lineage: orc_ppd_staging.v EXPRESSION []
+PREHOOK: query: CREATE TABLE orc_ppd(t tinyint,
+ si smallint,
+ i int,
+ b bigint,
+ f float,
+ d double,
+ bo boolean,
+ s string,
+ c char(50),
+ v varchar(50),
+ da date,
+ ts timestamp,
+ dec decimal(4,2),
+ bin binary)
+STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*")
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@orc_ppd
+POSTHOOK: query: CREATE TABLE orc_ppd(t tinyint,
+ si smallint,
+ i int,
+ b bigint,
+ f float,
+ d double,
+ bo boolean,
+ s string,
+ c char(50),
+ v varchar(50),
+ da date,
+ ts timestamp,
+ dec decimal(4,2),
+ bin binary)
+STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*")
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@orc_ppd
+PREHOOK: query: insert overwrite table orc_ppd select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), cast(ts as date), ts, dec, bin from orc_ppd_staging order by t, s
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd_staging
+PREHOOK: Output: default@orc_ppd
+POSTHOOK: query: insert overwrite table orc_ppd select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), cast(ts as date), ts, dec, bin from orc_ppd_staging order by t, s
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orc_ppd_staging
+POSTHOOK: Output: default@orc_ppd
+POSTHOOK: Lineage: orc_ppd.b SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:b, type:bigint, comment:null), ]
+POSTHOOK: Lineage: orc_ppd.bin SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:bin, type:binary, comment:null), ]
+POSTHOOK: Lineage: orc_ppd.bo SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:bo, type:boolean, comment:null), ]
+POSTHOOK: Lineage: orc_ppd.c EXPRESSION [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:s, type:string, comment:null), ]
+POSTHOOK: Lineage: orc_ppd.d SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:d, type:double, comment:null), ]
+POSTHOOK: Lineage: orc_ppd.da EXPRESSION [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:ts, type:timestamp, comment:null), ]
+POSTHOOK: Lineage: orc_ppd.dec SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:dec, type:decimal(4,2), comment:null), ]
+POSTHOOK: Lineage: orc_ppd.f SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:f, type:float, comment:null), ]
+POSTHOOK: Lineage: orc_ppd.i SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:i, type:int, comment:null), ]
+POSTHOOK: Lineage: orc_ppd.s SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:s, type:string, comment:null), ]
+POSTHOOK: Lineage: orc_ppd.si SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:si, type:smallint, comment:null), ]
+POSTHOOK: Lineage: orc_ppd.t SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:t, type:tinyint, comment:null), ]
+POSTHOOK: Lineage: orc_ppd.ts SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:ts, type:timestamp, comment:null), ]
+POSTHOOK: Lineage: orc_ppd.v EXPRESSION [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:s, type:string, comment:null), ]
+PREHOOK: query: -- Row group statistics for column t:
+-- Entry 0: count: 994 hasNull: true min: -10 max: 54 sum: 26014 positions: 0,0,0,0,0,0,0
+-- Entry 1: count: 1000 hasNull: false min: 54 max: 118 sum: 86812 positions: 0,2,124,0,0,116,11
+-- Entry 2: count: 100 hasNull: false min: 118 max: 127 sum: 12151 positions: 0,4,119,0,0,244,19
+
+-- INPUT_RECORDS: 0 (no row groups)
+select count(*) from orc_ppd where t > 127
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 16936
+ HDFS_BYTES_WRITTEN: 101
+ HDFS_READ_OPS: 4
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 2100
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+0
+PREHOOK: query: -- INPUT_RECORDS: 0 (no row groups)
+select count(*) from orc_ppd where t > 127
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 0
+ HDFS_BYTES_WRITTEN: 101
+ HDFS_READ_OPS: 2
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ RECORDS_OUT_0: 1
+0
+PREHOOK: query: -- INPUT_RECORDS: 1000 (1 row group)
+select count(*) from orc_ppd where t = 55
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 16936
+ HDFS_BYTES_WRITTEN: 101
+ HDFS_READ_OPS: 4
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 2100
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+8
+PREHOOK: query: -- INPUT_RECORDS: 1000 (1 row group)
+select count(*) from orc_ppd where t = 55
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 17909
+ HDFS_BYTES_WRITTEN: 101
+ HDFS_READ_OPS: 4
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 1000
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+8
+PREHOOK: query: -- INPUT_RECORDS: 2000 (2 row groups)
+select count(*) from orc_ppd where t = 54
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 16936
+ HDFS_BYTES_WRITTEN: 102
+ HDFS_READ_OPS: 4
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 2100
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+18
+PREHOOK: query: -- INPUT_RECORDS: 2000 (2 row groups)
+select count(*) from orc_ppd where t = 54
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 17909
+ HDFS_BYTES_WRITTEN: 102
+ HDFS_READ_OPS: 4
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 2000
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+18
+PREHOOK: query: alter table orc_ppd change column t t smallint
+PREHOOK: type: ALTERTABLE_RENAMECOL
+PREHOOK: Input: default@orc_ppd
+PREHOOK: Output: default@orc_ppd
+PREHOOK: query: -- INPUT_RECORDS: 0 (no row groups)
+select count(*) from orc_ppd where t > 127
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 16936
+ HDFS_BYTES_WRITTEN: 101
+ HDFS_READ_OPS: 4
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 2100
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+0
+PREHOOK: query: -- INPUT_RECORDS: 0 (no row groups)
+select count(*) from orc_ppd where t > 127
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 0
+ HDFS_BYTES_WRITTEN: 101
+ HDFS_READ_OPS: 2
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ RECORDS_OUT_0: 1
+0
+PREHOOK: query: -- INPUT_RECORDS: 1000 (1 row group)
+select count(*) from orc_ppd where t = 55
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 16936
+ HDFS_BYTES_WRITTEN: 101
+ HDFS_READ_OPS: 4
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 2100
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+8
+PREHOOK: query: -- INPUT_RECORDS: 1000 (1 row group)
+select count(*) from orc_ppd where t = 55
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 17909
+ HDFS_BYTES_WRITTEN: 101
+ HDFS_READ_OPS: 4
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 1000
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+8
+PREHOOK: query: -- INPUT_RECORDS: 2000 (2 row groups)
+select count(*) from orc_ppd where t = 54
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 16936
+ HDFS_BYTES_WRITTEN: 102
+ HDFS_READ_OPS: 4
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 2100
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+18
+PREHOOK: query: -- INPUT_RECORDS: 2000 (2 row groups)
+select count(*) from orc_ppd where t = 54
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 17909
+ HDFS_BYTES_WRITTEN: 102
+ HDFS_READ_OPS: 4
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 2000
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+18
+PREHOOK: query: alter table orc_ppd change column t t int
+PREHOOK: type: ALTERTABLE_RENAMECOL
+PREHOOK: Input: default@orc_ppd
+PREHOOK: Output: default@orc_ppd
+PREHOOK: query: -- INPUT_RECORDS: 0 (no row groups)
+select count(*) from orc_ppd where t > 127
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 16936
+ HDFS_BYTES_WRITTEN: 101
+ HDFS_READ_OPS: 4
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 2100
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+0
+PREHOOK: query: -- INPUT_RECORDS: 0 (no row groups)
+select count(*) from orc_ppd where t > 127
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 0
+ HDFS_BYTES_WRITTEN: 101
+ HDFS_READ_OPS: 2
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ RECORDS_OUT_0: 1
+0
+PREHOOK: query: -- INPUT_RECORDS: 1000 (1 row group)
+select count(*) from orc_ppd where t = 55
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 16936
+ HDFS_BYTES_WRITTEN: 101
+ HDFS_READ_OPS: 4
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 2100
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+8
+PREHOOK: query: -- INPUT_RECORDS: 1000 (1 row group)
+select count(*) from orc_ppd where t = 55
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 17909
+ HDFS_BYTES_WRITTEN: 101
+ HDFS_READ_OPS: 4
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 1000
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+8
+PREHOOK: query: -- INPUT_RECORDS: 2000 (2 row groups)
+select count(*) from orc_ppd where t = 54
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 16936
+ HDFS_BYTES_WRITTEN: 102
+ HDFS_READ_OPS: 4
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 2100
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+18
+PREHOOK: query: -- INPUT_RECORDS: 2000 (2 row groups)
+select count(*) from orc_ppd where t = 54
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 17909
+ HDFS_BYTES_WRITTEN: 102
+ HDFS_READ_OPS: 4
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 2000
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+18
+PREHOOK: query: alter table orc_ppd change column t t bigint
+PREHOOK: type: ALTERTABLE_RENAMECOL
+PREHOOK: Input: default@orc_ppd
+PREHOOK: Output: default@orc_ppd
+PREHOOK: query: -- INPUT_RECORDS: 0 (no row groups)
+select count(*) from orc_ppd where t > 127
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 16936
+ HDFS_BYTES_WRITTEN: 101
+ HDFS_READ_OPS: 4
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 2100
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+0
+PREHOOK: query: -- INPUT_RECORDS: 0 (no row groups)
+select count(*) from orc_ppd where t > 127
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 0
+ HDFS_BYTES_WRITTEN: 101
+ HDFS_READ_OPS: 2
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ RECORDS_OUT_0: 1
+0
+PREHOOK: query: -- INPUT_RECORDS: 1000 (1 row group)
+select count(*) from orc_ppd where t = 55
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 16936
+ HDFS_BYTES_WRITTEN: 101
+ HDFS_READ_OPS: 4
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 2100
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+8
+PREHOOK: query: -- INPUT_RECORDS: 1000 (1 row group)
+select count(*) from orc_ppd where t = 55
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 17909
+ HDFS_BYTES_WRITTEN: 101
+ HDFS_READ_OPS: 4
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 1000
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+8
+PREHOOK: query: -- INPUT_RECORDS: 2000 (2 row groups)
+select count(*) from orc_ppd where t = 54
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 16936
+ HDFS_BYTES_WRITTEN: 102
+ HDFS_READ_OPS: 4
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 2100
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+18
+PREHOOK: query: -- INPUT_RECORDS: 2000 (2 row groups)
+select count(*) from orc_ppd where t = 54
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 17909
+ HDFS_BYTES_WRITTEN: 102
+ HDFS_READ_OPS: 4
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 2000
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+18
+PREHOOK: query: alter table orc_ppd change column t t string
+PREHOOK: type: ALTERTABLE_RENAMECOL
+PREHOOK: Input: default@orc_ppd
+PREHOOK: Output: default@orc_ppd
+PREHOOK: query: -- INPUT_RECORDS: 0 (no row groups)
+select count(*) from orc_ppd where t > '127'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 16936
+ HDFS_BYTES_WRITTEN: 104
+ HDFS_READ_OPS: 4
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 2100
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+1566
+PREHOOK: query: -- INPUT_RECORDS: 0 (no row groups)
+select count(*) from orc_ppd where t > '127'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 17909
+ HDFS_BYTES_WRITTEN: 104
+ HDFS_READ_OPS: 4
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 2100
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+1566
+PREHOOK: query: -- INPUT_RECORDS: 1000 (1 row group)
+select count(*) from orc_ppd where t = '55'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 16936
+ HDFS_BYTES_WRITTEN: 101
+ HDFS_READ_OPS: 4
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 2100
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+8
+PREHOOK: query: -- INPUT_RECORDS: 1000 (1 row group)
+select count(*) from orc_ppd where t = '55'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 17909
+ HDFS_BYTES_WRITTEN: 101
+ HDFS_READ_OPS: 4
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 2100
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+8
+PREHOOK: query: -- INPUT_RECORDS: 2000 (2 row groups)
+select count(*) from orc_ppd where t = '54'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 16936
+ HDFS_BYTES_WRITTEN: 102
+ HDFS_READ_OPS: 4
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 2100
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+18
+PREHOOK: query: -- INPUT_RECORDS: 2000 (2 row groups)
+select count(*) from orc_ppd where t = '54'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 17909
+ HDFS_BYTES_WRITTEN: 102
+ HDFS_READ_OPS: 4
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 2100
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+18
+PREHOOK: query: -- float tests
+select count(*) from orc_ppd where f = 74.72
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 21496
+ HDFS_BYTES_WRITTEN: 101
+ HDFS_READ_OPS: 4
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 2100
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+2
+PREHOOK: query: select count(*) from orc_ppd where f = 74.72
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 23556
+ HDFS_BYTES_WRITTEN: 101
+ HDFS_READ_OPS: 4
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 100
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+2
+PREHOOK: query: alter table orc_ppd change column f f double
+PREHOOK: type: ALTERTABLE_RENAMECOL
+PREHOOK: Input: default@orc_ppd
+PREHOOK: Output: default@orc_ppd
+PREHOOK: query: select count(*) from orc_ppd where f = 74.72
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 21496
+ HDFS_BYTES_WRITTEN: 101
+ HDFS_READ_OPS: 4
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 2100
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+2
+PREHOOK: query: select count(*) from orc_ppd where f = 74.72
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 23556
+ HDFS_BYTES_WRITTEN: 101
+ HDFS_READ_OPS: 4
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 2100
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+2
+PREHOOK: query: alter table orc_ppd change column f f string
+PREHOOK: type: ALTERTABLE_RENAMECOL
+PREHOOK: Input: default@orc_ppd
+PREHOOK: Output: default@orc_ppd
+PREHOOK: query: select count(*) from orc_ppd where f = '74.72'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 21496
+ HDFS_BYTES_WRITTEN: 101
+ HDFS_READ_OPS: 4
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 2100
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+2
+PREHOOK: query: select count(*) from orc_ppd where f = '74.72'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 23556
+ HDFS_BYTES_WRITTEN: 101
+ HDFS_READ_OPS: 4
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 2100
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+2
+PREHOOK: query: -- string tests
+select count(*) from orc_ppd where s = 'bob davidson'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 20667
+ HDFS_BYTES_WRITTEN: 101
+ HDFS_READ_OPS: 4
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 2100
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+6
+PREHOOK: query: select count(*) from orc_ppd where s = 'bob davidson'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 22574
+ HDFS_BYTES_WRITTEN: 101
+ HDFS_READ_OPS: 4
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 2100
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+6
+PREHOOK: query: alter table orc_ppd change column s s char(50)
+PREHOOK: type: ALTERTABLE_RENAMECOL
+PREHOOK: Input: default@orc_ppd
+PREHOOK: Output: default@orc_ppd
+PREHOOK: query: select count(*) from orc_ppd where s = 'bob davidson'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 20667
+ HDFS_BYTES_WRITTEN: 101
+ HDFS_READ_OPS: 4
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 2100
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+6
+PREHOOK: query: select count(*) from orc_ppd where s = 'bob davidson'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 22574
+ HDFS_BYTES_WRITTEN: 101
+ HDFS_READ_OPS: 4
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 2100
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+6
+PREHOOK: query: alter table orc_ppd change column s s varchar(50)
+PREHOOK: type: ALTERTABLE_RENAMECOL
+PREHOOK: Input: default@orc_ppd
+PREHOOK: Output: default@orc_ppd
+PREHOOK: query: select count(*) from orc_ppd where s = 'bob davidson'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 20667
+ HDFS_BYTES_WRITTEN: 101
+ HDFS_READ_OPS: 4
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 2100
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+6
+PREHOOK: query: select count(*) from orc_ppd where s = 'bob davidson'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 22574
+ HDFS_BYTES_WRITTEN: 101
+ HDFS_READ_OPS: 4
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 2100
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+6
+PREHOOK: query: alter table orc_ppd change column s s char(50)
+PREHOOK: type: ALTERTABLE_RENAMECOL
+PREHOOK: Input: default@orc_ppd
+PREHOOK: Output: default@orc_ppd
+PREHOOK: query: select count(*) from orc_ppd where s = 'bob davidson'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 20667
+ HDFS_BYTES_WRITTEN: 101
+ HDFS_READ_OPS: 4
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 2100
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+6
+PREHOOK: query: select count(*) from orc_ppd where s = 'bob davidson'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 22574
+ HDFS_BYTES_WRITTEN: 101
+ HDFS_READ_OPS: 4
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 2100
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+6
+PREHOOK: query: alter table orc_ppd change column s s string
+PREHOOK: type: ALTERTABLE_RENAMECOL
+PREHOOK: Input: default@orc_ppd
+PREHOOK: Output: default@orc_ppd
+PREHOOK: query: select count(*) from orc_ppd where s = 'bob davidson'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 20667
+ HDFS_BYTES_WRITTEN: 101
+ HDFS_READ_OPS: 4
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 2100
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+6
+PREHOOK: query: select count(*) from orc_ppd where s = 'bob davidson'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 22574
+ HDFS_BYTES_WRITTEN: 101
+ HDFS_READ_OPS: 4
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 2100
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+6
+PREHOOK: query: alter table orc_ppd add columns (boo boolean)
+PREHOOK: type: ALTERTABLE_ADDCOLS
+PREHOOK: Input: default@orc_ppd
+PREHOOK: Output: default@orc_ppd
+PREHOOK: query: -- ppd on newly added column
+select count(*) from orc_ppd where si = 442
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 18785
+ HDFS_BYTES_WRITTEN: 101
+ HDFS_READ_OPS: 4
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 2100
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+4
+PREHOOK: query: select count(*) from orc_ppd where si = 442 or boo is not null or boo = false
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 18785
+ HDFS_BYTES_WRITTEN: 101
+ HDFS_READ_OPS: 4
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 2100
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+4
+PREHOOK: query: select count(*) from orc_ppd where si = 442
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 20256
+ HDFS_BYTES_WRITTEN: 101
+ HDFS_READ_OPS: 4
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 1000
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+4
+PREHOOK: query: select count(*) from orc_ppd where si = 442 or boo is not null or boo = false
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 20256
+ HDFS_BYTES_WRITTEN: 101
+ HDFS_READ_OPS: 4
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 2100
+ RECORDS_OUT_0: 1
+ RECORDS_OUT_INTERMEDIATE_Map_1: 1
+4
[2/2] hive git commit: HIVE-14310: ORC schema evolution should not
completely disable PPD (Prasanth Jayachandran reviewd by Owen O'Malley)
Posted by pr...@apache.org.
HIVE-14310: ORC schema evolution should not completely disable PPD (Prasanth Jayachandran reviewd by Owen O'Malley)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/949eed2d
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/949eed2d
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/949eed2d
Branch: refs/heads/master
Commit: 949eed2d0c04bb034805f06f3d7534fa55862f34
Parents: 602a5f3
Author: Prasanth Jayachandran <pr...@apache.org>
Authored: Thu Jul 28 12:02:28 2016 -0700
Committer: Prasanth Jayachandran <pr...@apache.org>
Committed: Thu Jul 28 12:02:28 2016 -0700
----------------------------------------------------------------------
.../test/resources/testconfiguration.properties | 1 +
.../llap/io/encoded/OrcEncodedDataReader.java | 8 +-
.../orc/impl/ConvertTreeReaderFactory.java | 3 +-
.../org/apache/orc/impl/RecordReaderImpl.java | 32 +-
.../org/apache/orc/impl/SchemaEvolution.java | 149 ++-
.../apache/orc/impl/TestSchemaEvolution.java | 307 ++++-
.../hadoop/hive/ql/io/orc/OrcInputFormat.java | 52 +-
.../clientpositive/orc_ppd_schema_evol_3a.q | 245 ++++
.../clientpositive/orc_ppd_schema_evol_3a.q.out | 544 +++++++++
.../tez/orc_ppd_schema_evol_3a.q.out | 1132 ++++++++++++++++++
10 files changed, 2396 insertions(+), 77 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/949eed2d/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index f356663..e5f40e6 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -184,6 +184,7 @@ minitez.query.files.shared=acid_globallimit.q,\
orc_ppd_schema_evol_1b.q,\
orc_ppd_schema_evol_2a.q,\
orc_ppd_schema_evol_2b.q,\
+ orc_ppd_schema_evol_3a.q,\
orc_vectorization_ppd.q,\
parallel.q,\
ptf.q,\
http://git-wip-us.apache.org/repos/asf/hive/blob/949eed2d/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java
----------------------------------------------------------------------
diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java
index 1dcd2cd..93c40e4 100644
--- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java
+++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java
@@ -26,9 +26,11 @@ import java.util.Collections;
import java.util.List;
import org.apache.hadoop.hive.llap.counters.LlapIOCounters;
-import org.apache.hadoop.hive.llap.metrics.LlapDaemonIOMetrics;
+import org.apache.orc.OrcUtils;
+import org.apache.orc.TypeDescription;
import org.apache.orc.impl.DataReaderProperties;
import org.apache.orc.impl.OrcIndex;
+import org.apache.orc.impl.SchemaEvolution;
import org.apache.tez.common.counters.TezCounters;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -704,8 +706,10 @@ public class OrcEncodedDataReader extends CallableWithNdc<Void>
List<OrcProto.Type> types = fileMetadata.getTypes();
String[] colNamesForSarg = OrcInputFormat.getSargColumnNames(
columnNames, types, globalIncludes, fileMetadata.isOriginalFormat());
+ TypeDescription schema = OrcUtils.convertTypeFromProtobuf(types, 0);
+ SchemaEvolution schemaEvolution = new SchemaEvolution(schema, globalIncludes);
sargApp = new RecordReaderImpl.SargApplier(sarg, colNamesForSarg,
- rowIndexStride, types, globalIncludes.length);
+ rowIndexStride, globalIncludes.length, schemaEvolution);
}
boolean hasAnyData = false;
// readState should have been initialized by this time with an empty array.
http://git-wip-us.apache.org/repos/asf/hive/blob/949eed2d/orc/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java b/orc/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java
index 03378a9..c347181 100644
--- a/orc/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java
+++ b/orc/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java
@@ -2788,8 +2788,7 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
}
}
- public static boolean canConvert(TypeDescription fileType, TypeDescription readerType)
- throws IOException {
+ public static boolean canConvert(TypeDescription fileType, TypeDescription readerType) {
Category readerTypeCategory = readerType.getCategory();
http://git-wip-us.apache.org/repos/asf/hive/blob/949eed2d/orc/src/java/org/apache/orc/impl/RecordReaderImpl.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/impl/RecordReaderImpl.java b/orc/src/java/org/apache/orc/impl/RecordReaderImpl.java
index eb43ed6..92b6a8b 100644
--- a/orc/src/java/org/apache/orc/impl/RecordReaderImpl.java
+++ b/orc/src/java/org/apache/orc/impl/RecordReaderImpl.java
@@ -44,8 +44,6 @@ import org.apache.orc.StripeInformation;
import org.apache.orc.TimestampColumnStatistics;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.io.DiskRange;
import org.apache.hadoop.hive.common.io.DiskRangeList;
@@ -164,21 +162,11 @@ public class RecordReaderImpl implements RecordReader {
this.bufferSize = fileReader.bufferSize;
this.rowIndexStride = fileReader.rowIndexStride;
SearchArgument sarg = options.getSearchArgument();
- // We want to use the sarg for predicate evaluation but we have data type conversion
- // (i.e Schema Evolution), so we currently ignore it.
- if (sarg != null && rowIndexStride != 0 && !evolution.hasConversion()) {
- sargApp = new SargApplier(
- sarg, options.getColumnNames(), rowIndexStride, types,
- included.length);
+ if (sarg != null && rowIndexStride != 0) {
+ sargApp = new SargApplier(sarg, options.getColumnNames(), rowIndexStride,
+ included.length, evolution);
} else {
sargApp = null;
- if (evolution.hasConversion()) {
- if (LOG.isDebugEnabled()) {
- LOG.debug(
- "Skipping stripe elimination for {} since the schema has data type conversion",
- fileReader.path);
- }
- }
}
long rows = 0;
long skippedRows = 0;
@@ -720,9 +708,10 @@ public class RecordReaderImpl implements RecordReader {
private final long rowIndexStride;
// same as the above array, but indices are set to true
private final boolean[] sargColumns;
+ private SchemaEvolution evolution;
public SargApplier(SearchArgument sarg, String[] columnNames, long rowIndexStride,
- List<OrcProto.Type> types, int includedCount) {
+ int includedCount, final SchemaEvolution evolution) {
this.sarg = sarg;
sargLeaves = sarg.getLeaves();
filterColumns = mapSargColumnsToOrcInternalColIdx(sargLeaves, columnNames, 0);
@@ -735,6 +724,7 @@ public class RecordReaderImpl implements RecordReader {
sargColumns[i] = true;
}
}
+ this.evolution = evolution;
}
/**
@@ -764,10 +754,14 @@ public class RecordReaderImpl implements RecordReader {
}
OrcProto.ColumnStatistics stats = entry.getStatistics();
OrcProto.BloomFilter bf = null;
- if (bloomFilterIndices != null && bloomFilterIndices[filterColumns[pred]] != null) {
- bf = bloomFilterIndices[filterColumns[pred]].getBloomFilter(rowGroup);
+ if (bloomFilterIndices != null && bloomFilterIndices[columnIx] != null) {
+ bf = bloomFilterIndices[columnIx].getBloomFilter(rowGroup);
+ }
+ if (evolution != null && evolution.isPPDSafeConversion(columnIx)) {
+ leafValues[pred] = evaluatePredicateProto(stats, sargLeaves.get(pred), bf);
+ } else {
+ leafValues[pred] = TruthValue.YES_NO_NULL;
}
- leafValues[pred] = evaluatePredicateProto(stats, sargLeaves.get(pred), bf);
if (LOG.isTraceEnabled()) {
LOG.trace("Stats = " + stats);
LOG.trace("Setting " + sargLeaves.get(pred) + " to " + leafValues[pred]);
http://git-wip-us.apache.org/repos/asf/hive/blob/949eed2d/orc/src/java/org/apache/orc/impl/SchemaEvolution.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/impl/SchemaEvolution.java b/orc/src/java/org/apache/orc/impl/SchemaEvolution.java
index ce3af7a..7379de9 100644
--- a/orc/src/java/org/apache/orc/impl/SchemaEvolution.java
+++ b/orc/src/java/org/apache/orc/impl/SchemaEvolution.java
@@ -18,13 +18,10 @@
package org.apache.orc.impl;
-import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
import org.apache.orc.TypeDescription;
/**
@@ -32,35 +29,40 @@ import org.apache.orc.TypeDescription;
* has been schema evolution.
*/
public class SchemaEvolution {
+ // indexed by reader column id
private final TypeDescription[] readerFileTypes;
+ // indexed by reader column id
private final boolean[] included;
+ private final TypeDescription fileSchema;
private final TypeDescription readerSchema;
private boolean hasConversion;
- private static final Log LOG = LogFactory.getLog(SchemaEvolution.class);
+ // indexed by reader column id
+ private final boolean[] ppdSafeConversion;
- public SchemaEvolution(TypeDescription readerSchema, boolean[] included) {
- this.included = (included == null ? null : Arrays.copyOf(included, included.length));
- this.readerSchema = readerSchema;
-
- hasConversion = false;
-
- readerFileTypes = new TypeDescription[this.readerSchema.getMaximumId() + 1];
- buildSameSchemaFileTypesArray();
+ public SchemaEvolution(TypeDescription fileSchema, boolean[] includedCols) {
+ this(fileSchema, null, includedCols);
}
public SchemaEvolution(TypeDescription fileSchema,
TypeDescription readerSchema,
- boolean[] included) throws IOException {
- this.included = (included == null ? null : Arrays.copyOf(included, included.length));
- if (checkAcidSchema(fileSchema)) {
- this.readerSchema = createEventSchema(readerSchema);
+ boolean[] includeCols) {
+ this.included = includeCols == null ? null : Arrays.copyOf(includeCols, includeCols.length);
+ this.hasConversion = false;
+ this.fileSchema = fileSchema;
+ if (readerSchema != null) {
+ if (checkAcidSchema(fileSchema)) {
+ this.readerSchema = createEventSchema(readerSchema);
+ } else {
+ this.readerSchema = readerSchema;
+ }
+ this.readerFileTypes = new TypeDescription[this.readerSchema.getMaximumId() + 1];
+ buildConversionFileTypesArray(fileSchema, this.readerSchema);
} else {
- this.readerSchema = readerSchema;
+ this.readerSchema = fileSchema;
+ this.readerFileTypes = new TypeDescription[this.readerSchema.getMaximumId() + 1];
+ buildSameSchemaFileTypesArray();
}
-
- hasConversion = false;
- readerFileTypes = new TypeDescription[this.readerSchema.getMaximumId() + 1];
- buildConversionFileTypesArray(fileSchema, this.readerSchema);
+ this.ppdSafeConversion = populatePpdSafeConversion();
}
public TypeDescription getReaderSchema() {
@@ -81,15 +83,114 @@ public class SchemaEvolution {
/**
* Get the file type by reader type id.
- * @param readerType
+ * @param id reader column id
* @return
*/
public TypeDescription getFileType(int id) {
return readerFileTypes[id];
}
+ /**
+ * Check if column is safe for ppd evaluation
+ * @param colId reader column id
+ * @return true if the specified column is safe for ppd evaluation else false
+ */
+ public boolean isPPDSafeConversion(final int colId) {
+ if (hasConversion()) {
+ if (colId < 0 || colId >= ppdSafeConversion.length) {
+ return false;
+ }
+ return ppdSafeConversion[colId];
+ }
+
+ // when there is no schema evolution PPD is safe
+ return true;
+ }
+
+ private boolean[] populatePpdSafeConversion() {
+ if (fileSchema == null || readerSchema == null || readerFileTypes == null) {
+ return null;
+ }
+
+ boolean[] result = new boolean[readerSchema.getMaximumId() + 1];
+ boolean safePpd = validatePPDConversion(fileSchema, readerSchema);
+ result[readerSchema.getId()] = safePpd;
+ List<TypeDescription> children = readerSchema.getChildren();
+ if (children != null) {
+ for (TypeDescription child : children) {
+ TypeDescription fileType = getFileType(child.getId());
+ safePpd = validatePPDConversion(fileType, child);
+ result[child.getId()] = safePpd;
+ }
+ }
+ return result;
+ }
+
+ private boolean validatePPDConversion(final TypeDescription fileType,
+ final TypeDescription readerType) {
+ if (fileType == null) {
+ return false;
+ }
+ if (fileType.getCategory().isPrimitive()) {
+ if (fileType.getCategory().equals(readerType.getCategory())) {
+ // for decimals alone do equality check to not mess up with precision change
+ if (fileType.getCategory().equals(TypeDescription.Category.DECIMAL) &&
+ !fileType.equals(readerType)) {
+ return false;
+ }
+ return true;
+ }
+
+ // only integer and string evolutions are safe
+ // byte -> short -> int -> long
+ // string <-> char <-> varchar
+ // NOTE: Float to double evolution is not safe as floats are stored as doubles in ORC's
+ // internal index, but when doing predicate evaluation for queries like "select * from
+ // orc_float where f = 74.72" the constant on the filter is converted from string -> double
+ // so the precisions will be different and the comparison will fail.
+ // Soon, we should convert all sargs that compare equality between floats or
+ // doubles to range predicates.
+
+ // Similarly string -> char and varchar -> char and vice versa is not possible, as ORC stores
+ // char with padded spaces in its internal index.
+ switch (fileType.getCategory()) {
+ case BYTE:
+ if (readerType.getCategory().equals(TypeDescription.Category.SHORT) ||
+ readerType.getCategory().equals(TypeDescription.Category.INT) ||
+ readerType.getCategory().equals(TypeDescription.Category.LONG)) {
+ return true;
+ }
+ break;
+ case SHORT:
+ if (readerType.getCategory().equals(TypeDescription.Category.INT) ||
+ readerType.getCategory().equals(TypeDescription.Category.LONG)) {
+ return true;
+ }
+ break;
+ case INT:
+ if (readerType.getCategory().equals(TypeDescription.Category.LONG)) {
+ return true;
+ }
+ break;
+ case STRING:
+ if (readerType.getCategory().equals(TypeDescription.Category.VARCHAR)) {
+ return true;
+ }
+ break;
+ case VARCHAR:
+ if (readerType.getCategory().equals(TypeDescription.Category.STRING)) {
+ return true;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+ return false;
+ }
+
void buildConversionFileTypesArray(TypeDescription fileType,
- TypeDescription readerType) throws IOException {
+ TypeDescription readerType) {
// if the column isn't included, don't map it
if (included != null && !included[readerType.getId()]) {
return;
@@ -171,7 +272,7 @@ public class SchemaEvolution {
}
readerFileTypes[id] = fileType;
} else {
- throw new IOException(
+ throw new IllegalArgumentException(
String.format(
"ORC does not support type conversion from file type %s (%d) to reader type %s (%d)",
fileType.toString(), fileType.getId(),
http://git-wip-us.apache.org/repos/asf/hive/blob/949eed2d/orc/src/test/org/apache/orc/impl/TestSchemaEvolution.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/orc/impl/TestSchemaEvolution.java b/orc/src/test/org/apache/orc/impl/TestSchemaEvolution.java
index a9c64fa..c28af94 100644
--- a/orc/src/test/org/apache/orc/impl/TestSchemaEvolution.java
+++ b/orc/src/test/org/apache/orc/impl/TestSchemaEvolution.java
@@ -120,7 +120,8 @@ public class TestSchemaEvolution {
.withPrecision(20).withScale(10)))
.addField("f2", TypeDescription.createStruct()
.addField("f3", TypeDescription.createDate())
- .addField("f4", TypeDescription.createDouble()))
+ .addField("f4", TypeDescription.createDouble())
+ .addField("f5", TypeDescription.createByte()))
.addField("f6", TypeDescription.createChar().withMaxLength(100));
SchemaEvolution both2diff = new SchemaEvolution(fileStruct2, readerStruct2diff, null);
assertTrue(both2diff.hasConversion());
@@ -131,7 +132,8 @@ public class TestSchemaEvolution {
.withPrecision(20).withScale(10)))
.addField("f2", TypeDescription.createStruct()
.addField("f3", TypeDescription.createDate())
- .addField("f4", TypeDescription.createDouble()))
+ .addField("f4", TypeDescription.createDouble())
+ .addField("f5", TypeDescription.createBoolean()))
.addField("f6", TypeDescription.createChar().withMaxLength(80));
SchemaEvolution both2diffChar = new SchemaEvolution(fileStruct2, readerStruct2diffChar, null);
assertTrue(both2diffChar.hasConversion());
@@ -163,4 +165,305 @@ public class TestSchemaEvolution {
assertEquals(74.72, ((DoubleColumnVector) batch.cols[0]).vector[0], 0.00000000001);
rows.close();
}
+
+ @Test
+ public void testSafePpdEvaluation() throws IOException {
+ TypeDescription fileStruct1 = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createInt())
+ .addField("f2", TypeDescription.createString())
+ .addField("f3", TypeDescription.createDecimal().withPrecision(38).withScale(10));
+ SchemaEvolution same1 = new SchemaEvolution(fileStruct1, null);
+ assertTrue(same1.isPPDSafeConversion(0));
+ assertFalse(same1.hasConversion());
+ TypeDescription readerStruct1 = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createInt())
+ .addField("f2", TypeDescription.createString())
+ .addField("f3", TypeDescription.createDecimal().withPrecision(38).withScale(10));
+ SchemaEvolution both1 = new SchemaEvolution(fileStruct1, readerStruct1, null);
+ assertFalse(both1.hasConversion());
+ assertTrue(both1.isPPDSafeConversion(0));
+ assertTrue(both1.isPPDSafeConversion(1));
+ assertTrue(both1.isPPDSafeConversion(2));
+ assertTrue(both1.isPPDSafeConversion(3));
+
+ // int -> long
+ TypeDescription readerStruct1diff = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createLong())
+ .addField("f2", TypeDescription.createString())
+ .addField("f3", TypeDescription.createDecimal().withPrecision(38).withScale(10));
+ SchemaEvolution both1diff = new SchemaEvolution(fileStruct1, readerStruct1diff, null);
+ assertTrue(both1diff.hasConversion());
+ assertFalse(both1diff.isPPDSafeConversion(0));
+ assertTrue(both1diff.isPPDSafeConversion(1));
+ assertTrue(both1diff.isPPDSafeConversion(2));
+ assertTrue(both1diff.isPPDSafeConversion(3));
+
+ // decimal(38,10) -> decimal(12, 10)
+ TypeDescription readerStruct1diffPrecision = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createInt())
+ .addField("f2", TypeDescription.createString())
+ .addField("f3", TypeDescription.createDecimal().withPrecision(12).withScale(10));
+ SchemaEvolution both1diffPrecision = new SchemaEvolution(fileStruct1, readerStruct1diffPrecision,
+ new boolean[] {true, false, false, true});
+ assertTrue(both1diffPrecision.hasConversion());
+ assertFalse(both1diffPrecision.isPPDSafeConversion(0));
+ assertFalse(both1diffPrecision.isPPDSafeConversion(1)); // column not included
+ assertFalse(both1diffPrecision.isPPDSafeConversion(2)); // column not included
+ assertFalse(both1diffPrecision.isPPDSafeConversion(3));
+
+ // add columns
+ readerStruct1 = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createInt())
+ .addField("f2", TypeDescription.createString())
+ .addField("f3", TypeDescription.createDecimal().withPrecision(38).withScale(10))
+ .addField("f4", TypeDescription.createBoolean());
+ both1 = new SchemaEvolution(fileStruct1, readerStruct1, null);
+ assertTrue(both1.hasConversion());
+ assertFalse(both1.isPPDSafeConversion(0));
+ assertTrue(both1.isPPDSafeConversion(1));
+ assertTrue(both1.isPPDSafeConversion(2));
+ assertTrue(both1.isPPDSafeConversion(3));
+ assertFalse(both1.isPPDSafeConversion(4));
+ }
+
+ @Test
+ public void testSafePpdEvaluationForInts() throws IOException {
+ // byte -> short -> int -> long
+ TypeDescription fileSchema = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createByte());
+ SchemaEvolution schemaEvolution = new SchemaEvolution(fileSchema, null);
+ assertFalse(schemaEvolution.hasConversion());
+
+ // byte -> short
+ TypeDescription readerSchema = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createShort());
+ schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+ assertTrue(schemaEvolution.hasConversion());
+ assertFalse(schemaEvolution.isPPDSafeConversion(0));
+ assertTrue(schemaEvolution.isPPDSafeConversion(1));
+
+ // byte -> int
+ readerSchema = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createInt());
+ schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+ assertTrue(schemaEvolution.hasConversion());
+ assertFalse(schemaEvolution.isPPDSafeConversion(0));
+ assertTrue(schemaEvolution.isPPDSafeConversion(1));
+
+ // byte -> long
+ readerSchema = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createLong());
+ schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+ assertTrue(schemaEvolution.hasConversion());
+ assertFalse(schemaEvolution.isPPDSafeConversion(0));
+ assertTrue(schemaEvolution.isPPDSafeConversion(1));
+
+ // short -> int -> long
+ fileSchema = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createShort());
+ schemaEvolution = new SchemaEvolution(fileSchema, null);
+ assertFalse(schemaEvolution.hasConversion());
+
+ // unsafe conversion short -> byte
+ readerSchema = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createByte());
+ schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+ assertTrue(schemaEvolution.hasConversion());
+ assertFalse(schemaEvolution.isPPDSafeConversion(0));
+ assertFalse(schemaEvolution.isPPDSafeConversion(1));
+
+ // short -> int
+ readerSchema = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createInt());
+ schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+ assertTrue(schemaEvolution.hasConversion());
+ assertFalse(schemaEvolution.isPPDSafeConversion(0));
+ assertTrue(schemaEvolution.isPPDSafeConversion(1));
+
+ // short -> long
+ readerSchema = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createLong());
+ schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+ assertTrue(schemaEvolution.hasConversion());
+ assertFalse(schemaEvolution.isPPDSafeConversion(0));
+ assertTrue(schemaEvolution.isPPDSafeConversion(1));
+
+ // int -> long
+ fileSchema = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createInt());
+ schemaEvolution = new SchemaEvolution(fileSchema, null);
+ assertFalse(schemaEvolution.hasConversion());
+
+ // unsafe conversion int -> byte
+ readerSchema = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createByte());
+ schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+ assertTrue(schemaEvolution.hasConversion());
+ assertFalse(schemaEvolution.isPPDSafeConversion(0));
+ assertFalse(schemaEvolution.isPPDSafeConversion(1));
+
+ // unsafe conversion int -> short
+ readerSchema = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createShort());
+ schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+ assertTrue(schemaEvolution.hasConversion());
+ assertFalse(schemaEvolution.isPPDSafeConversion(0));
+ assertFalse(schemaEvolution.isPPDSafeConversion(1));
+
+ // int -> long
+ readerSchema = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createLong());
+ schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+ assertTrue(schemaEvolution.hasConversion());
+ assertFalse(schemaEvolution.isPPDSafeConversion(0));
+ assertTrue(schemaEvolution.isPPDSafeConversion(1));
+
+ // long
+ fileSchema = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createLong());
+ schemaEvolution = new SchemaEvolution(fileSchema, null);
+ assertTrue(schemaEvolution.isPPDSafeConversion(0));
+ assertFalse(schemaEvolution.hasConversion());
+
+ // unsafe conversion long -> byte
+ readerSchema = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createByte());
+ schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+ assertTrue(schemaEvolution.hasConversion());
+ assertFalse(schemaEvolution.isPPDSafeConversion(0));
+ assertFalse(schemaEvolution.isPPDSafeConversion(1));
+
+ // unsafe conversion long -> short
+ readerSchema = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createShort());
+ schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+ assertTrue(schemaEvolution.hasConversion());
+ assertFalse(schemaEvolution.isPPDSafeConversion(0));
+ assertFalse(schemaEvolution.isPPDSafeConversion(1));
+
+ // unsafe conversion long -> int
+ readerSchema = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createInt());
+ schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+ assertTrue(schemaEvolution.hasConversion());
+ assertFalse(schemaEvolution.isPPDSafeConversion(0));
+ assertFalse(schemaEvolution.isPPDSafeConversion(1));
+
+ // invalid
+ readerSchema = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createString());
+ schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+ assertTrue(schemaEvolution.hasConversion());
+ assertFalse(schemaEvolution.isPPDSafeConversion(0));
+ assertFalse(schemaEvolution.isPPDSafeConversion(1));
+
+ // invalid
+ readerSchema = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createFloat());
+ schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+ assertTrue(schemaEvolution.hasConversion());
+ assertFalse(schemaEvolution.isPPDSafeConversion(0));
+ assertFalse(schemaEvolution.isPPDSafeConversion(1));
+
+ // invalid
+ readerSchema = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createTimestamp());
+ schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+ assertTrue(schemaEvolution.hasConversion());
+ assertFalse(schemaEvolution.isPPDSafeConversion(0));
+ assertFalse(schemaEvolution.isPPDSafeConversion(1));
+ }
+
+ @Test
+ public void testSafePpdEvaluationForStrings() throws IOException {
+ TypeDescription fileSchema = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createString());
+ SchemaEvolution schemaEvolution = new SchemaEvolution(fileSchema, null);
+ assertTrue(schemaEvolution.isPPDSafeConversion(0));
+ assertFalse(schemaEvolution.hasConversion());
+
+ // string -> char
+ TypeDescription readerSchema = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createChar());
+ schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+ assertTrue(schemaEvolution.hasConversion());
+ assertFalse(schemaEvolution.isPPDSafeConversion(0));
+ assertFalse(schemaEvolution.isPPDSafeConversion(1));
+
+ // string -> varchar
+ readerSchema = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createVarchar());
+ schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+ assertTrue(schemaEvolution.hasConversion());
+ assertFalse(schemaEvolution.isPPDSafeConversion(0));
+ assertTrue(schemaEvolution.isPPDSafeConversion(1));
+
+ fileSchema = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createChar());
+ schemaEvolution = new SchemaEvolution(fileSchema, null);
+ assertTrue(schemaEvolution.isPPDSafeConversion(0));
+ assertFalse(schemaEvolution.hasConversion());
+
+ // char -> string
+ readerSchema = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createString());
+ schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+ assertTrue(schemaEvolution.hasConversion());
+ assertFalse(schemaEvolution.isPPDSafeConversion(0));
+ assertFalse(schemaEvolution.isPPDSafeConversion(1));
+
+ // char -> varchar
+ readerSchema = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createVarchar());
+ schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+ assertTrue(schemaEvolution.hasConversion());
+ assertFalse(schemaEvolution.isPPDSafeConversion(0));
+ assertFalse(schemaEvolution.isPPDSafeConversion(1));
+
+ fileSchema = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createVarchar());
+ schemaEvolution = new SchemaEvolution(fileSchema, null);
+ assertTrue(schemaEvolution.isPPDSafeConversion(0));
+ assertFalse(schemaEvolution.hasConversion());
+
+ // varchar -> string
+ readerSchema = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createString());
+ schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+ assertTrue(schemaEvolution.hasConversion());
+ assertFalse(schemaEvolution.isPPDSafeConversion(0));
+ assertTrue(schemaEvolution.isPPDSafeConversion(1));
+
+ // varchar -> char
+ readerSchema = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createChar());
+ schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+ assertTrue(schemaEvolution.hasConversion());
+ assertFalse(schemaEvolution.isPPDSafeConversion(0));
+ assertFalse(schemaEvolution.isPPDSafeConversion(1));
+
+ // invalid
+ readerSchema = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createDecimal());
+ schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+ assertTrue(schemaEvolution.hasConversion());
+ assertFalse(schemaEvolution.isPPDSafeConversion(0));
+ assertFalse(schemaEvolution.isPPDSafeConversion(1));
+
+ // invalid
+ readerSchema = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createDate());
+ schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+ assertTrue(schemaEvolution.hasConversion());
+ assertFalse(schemaEvolution.isPPDSafeConversion(0));
+ assertFalse(schemaEvolution.isPPDSafeConversion(1));
+
+ // invalid
+ readerSchema = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createInt());
+ schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+ assertTrue(schemaEvolution.hasConversion());
+ assertFalse(schemaEvolution.isPPDSafeConversion(0));
+ assertFalse(schemaEvolution.isPPDSafeConversion(1));
+ }
}
http://git-wip-us.apache.org/repos/asf/hive/blob/949eed2d/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
index 63d02fb..0a2c3fa 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
@@ -99,7 +99,6 @@ import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
-import org.apache.hadoop.hive.ql.plan.MapWork;
import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
import org.apache.hadoop.hive.serde2.SerDeStats;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
@@ -1272,24 +1271,14 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
// We can't eliminate stripes if there are deltas because the
// deltas may change the rows making them match the predicate.
if ((deltas == null || deltas.isEmpty()) && context.sarg != null) {
- // Also, we currently do not use predicate evaluation when the schema has data type
- // conversion.
- if (evolution.hasConversion()) {
- if (LOG.isDebugEnabled()) {
- LOG.debug(
- "Skipping split elimination for {} since the schema has data type conversion",
- file.getPath());
- }
+ String[] colNames =
+ extractNeededColNames((readerTypes == null ? fileTypes : readerTypes),
+ context.conf, readerIncluded, isOriginal);
+ if (colNames == null) {
+ LOG.warn("Skipping split elimination for {} as column names is null", file.getPath());
} else {
- String[] colNames =
- extractNeededColNames((readerTypes == null ? fileTypes : readerTypes),
- context.conf, readerIncluded, isOriginal);
- if (colNames == null) {
- LOG.warn("Skipping split elimination for {} as column names is null", file.getPath());
- } else {
- includeStripe = pickStripes(context.sarg, colNames, writerVersion, isOriginal,
- stripeStats, stripes.size(), file.getPath());
- }
+ includeStripe = pickStripes(context.sarg, colNames, writerVersion, isOriginal,
+ stripeStats, stripes.size(), file.getPath(), evolution);
}
}
return generateSplitsFromStripes(includeStripe);
@@ -1901,12 +1890,14 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
// eliminate stripes that doesn't satisfy the predicate condition
List<PredicateLeaf> sargLeaves = sarg.getLeaves();
int[] filterColumns = RecordReaderImpl.mapTranslatedSargColumns(types, sargLeaves);
- return pickStripesInternal(sarg, filterColumns, stripeStats, stripeCount, null);
+ TypeDescription schema = OrcUtils.convertTypeFromProtobuf(types, 0);
+ SchemaEvolution evolution = new SchemaEvolution(schema, null);
+ return pickStripesInternal(sarg, filterColumns, stripeStats, stripeCount, null, evolution);
}
private static boolean[] pickStripes(SearchArgument sarg, String[] sargColNames,
OrcFile.WriterVersion writerVersion, boolean isOriginal, List<StripeStatistics> stripeStats,
- int stripeCount, Path filePath) {
+ int stripeCount, Path filePath, final SchemaEvolution evolution) {
if (sarg == null || stripeStats == null || writerVersion == OrcFile.WriterVersion.ORIGINAL) {
return null; // only do split pruning if HIVE-8732 has been fixed in the writer
}
@@ -1914,15 +1905,16 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
List<PredicateLeaf> sargLeaves = sarg.getLeaves();
int[] filterColumns = RecordReaderImpl.mapSargColumnsToOrcInternalColIdx(sargLeaves,
sargColNames, getRootColumn(isOriginal));
- return pickStripesInternal(sarg, filterColumns, stripeStats, stripeCount, filePath);
+ return pickStripesInternal(sarg, filterColumns, stripeStats, stripeCount, filePath, evolution);
}
private static boolean[] pickStripesInternal(SearchArgument sarg, int[] filterColumns,
- List<StripeStatistics> stripeStats, int stripeCount, Path filePath) {
+ List<StripeStatistics> stripeStats, int stripeCount, Path filePath,
+ final SchemaEvolution evolution) {
boolean[] includeStripe = new boolean[stripeCount];
for (int i = 0; i < includeStripe.length; ++i) {
includeStripe[i] = (i >= stripeStats.size()) ||
- isStripeSatisfyPredicate(stripeStats.get(i), sarg, filterColumns);
+ isStripeSatisfyPredicate(stripeStats.get(i), sarg, filterColumns, evolution);
if (isDebugEnabled && !includeStripe[i]) {
LOG.debug("Eliminating ORC stripe-" + i + " of file '" + filePath
+ "' as it did not satisfy predicate condition.");
@@ -1932,15 +1924,19 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
}
private static boolean isStripeSatisfyPredicate(
- StripeStatistics stripeStatistics, SearchArgument sarg, int[] filterColumns) {
+ StripeStatistics stripeStatistics, SearchArgument sarg, int[] filterColumns,
+ final SchemaEvolution evolution) {
List<PredicateLeaf> predLeaves = sarg.getLeaves();
TruthValue[] truthValues = new TruthValue[predLeaves.size()];
for (int pred = 0; pred < truthValues.length; pred++) {
if (filterColumns[pred] != -1) {
-
- // column statistics at index 0 contains only the number of rows
- ColumnStatistics stats = stripeStatistics.getColumnStatistics()[filterColumns[pred]];
- truthValues[pred] = RecordReaderImpl.evaluatePredicate(stats, predLeaves.get(pred), null);
+ if (evolution != null && !evolution.isPPDSafeConversion(filterColumns[pred])) {
+ truthValues[pred] = TruthValue.YES_NO_NULL;
+ } else {
+ // column statistics at index 0 contains only the number of rows
+ ColumnStatistics stats = stripeStatistics.getColumnStatistics()[filterColumns[pred]];
+ truthValues[pred] = RecordReaderImpl.evaluatePredicate(stats, predLeaves.get(pred), null);
+ }
} else {
// parition column case.
http://git-wip-us.apache.org/repos/asf/hive/blob/949eed2d/ql/src/test/queries/clientpositive/orc_ppd_schema_evol_3a.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/orc_ppd_schema_evol_3a.q b/ql/src/test/queries/clientpositive/orc_ppd_schema_evol_3a.q
new file mode 100644
index 0000000..88a94eb
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/orc_ppd_schema_evol_3a.q
@@ -0,0 +1,245 @@
+set hive.mapred.mode=nonstrict;
+SET hive.fetch.task.conversion=none;
+SET hive.cbo.enable=false;
+
+CREATE TABLE staging(t tinyint,
+ si smallint,
+ i int,
+ b bigint,
+ f float,
+ d double,
+ bo boolean,
+ s string,
+ ts timestamp,
+ dec decimal(4,2),
+ bin binary)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE staging;
+LOAD DATA LOCAL INPATH '../../data/files/over1k' INTO TABLE staging;
+
+CREATE TABLE orc_ppd_staging(t tinyint,
+ si smallint,
+ i int,
+ b bigint,
+ f float,
+ d double,
+ bo boolean,
+ s string,
+ c char(50),
+ v varchar(50),
+ da date,
+ ts timestamp,
+ dec decimal(4,2),
+ bin binary)
+STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*");
+
+insert overwrite table orc_ppd_staging select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), cast(ts as date), ts, dec, bin from staging order by t, s;
+
+-- just to introduce a gap in min/max range for bloom filters. The dataset has contiguous values
+-- which makes it hard to test bloom filters
+insert into orc_ppd_staging select -10,-321,-65680,-4294967430,-97.94,-13.07,true,"aaa","aaa","aaa","1990-03-11","1990-03-11 10:11:58.703308",-71.54,"aaa" from staging limit 1;
+insert into orc_ppd_staging select 127,331,65690,4294967440,107.94,23.07,true,"zzz","zzz","zzz","2023-03-11","2023-03-11 10:11:58.703308",71.54,"zzz" from staging limit 1;
+
+CREATE TABLE orc_ppd(t tinyint,
+ si smallint,
+ i int,
+ b bigint,
+ f float,
+ d double,
+ bo boolean,
+ s string,
+ c char(50),
+ v varchar(50),
+ da date,
+ ts timestamp,
+ dec decimal(4,2),
+ bin binary)
+STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*");
+
+insert overwrite table orc_ppd select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), cast(ts as date), ts, dec, bin from orc_ppd_staging order by t, s;
+
+SET hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.PostExecTezSummaryPrinter;
+SET hive.optimize.index.filter=false;
+
+-- Row group statistics for column t:
+-- Entry 0: count: 994 hasNull: true min: -10 max: 54 sum: 26014 positions: 0,0,0,0,0,0,0
+-- Entry 1: count: 1000 hasNull: false min: 54 max: 118 sum: 86812 positions: 0,2,124,0,0,116,11
+-- Entry 2: count: 100 hasNull: false min: 118 max: 127 sum: 12151 positions: 0,4,119,0,0,244,19
+
+-- INPUT_RECORDS: 0 (no row groups)
+select count(*) from orc_ppd where t > 127;
+SET hive.optimize.index.filter=true;
+-- INPUT_RECORDS: 0 (no row groups)
+select count(*) from orc_ppd where t > 127;
+
+SET hive.optimize.index.filter=false;
+-- INPUT_RECORDS: 1000 (1 row group)
+select count(*) from orc_ppd where t = 55;
+SET hive.optimize.index.filter=true;
+-- INPUT_RECORDS: 1000 (1 row group)
+select count(*) from orc_ppd where t = 55;
+
+SET hive.optimize.index.filter=false;
+-- INPUT_RECORDS: 2000 (2 row groups)
+select count(*) from orc_ppd where t = 54;
+SET hive.optimize.index.filter=true;
+-- INPUT_RECORDS: 2000 (2 row groups)
+select count(*) from orc_ppd where t = 54;
+
+alter table orc_ppd change column t t smallint;
+
+SET hive.optimize.index.filter=false;
+-- INPUT_RECORDS: 0 (no row groups)
+select count(*) from orc_ppd where t > 127;
+SET hive.optimize.index.filter=true;
+-- INPUT_RECORDS: 0 (no row groups)
+select count(*) from orc_ppd where t > 127;
+
+SET hive.optimize.index.filter=false;
+-- INPUT_RECORDS: 1000 (1 row group)
+select count(*) from orc_ppd where t = 55;
+SET hive.optimize.index.filter=true;
+-- INPUT_RECORDS: 1000 (1 row group)
+select count(*) from orc_ppd where t = 55;
+
+SET hive.optimize.index.filter=false;
+-- INPUT_RECORDS: 2000 (2 row groups)
+select count(*) from orc_ppd where t = 54;
+SET hive.optimize.index.filter=true;
+-- INPUT_RECORDS: 2000 (2 row groups)
+select count(*) from orc_ppd where t = 54;
+
+alter table orc_ppd change column t t int;
+
+SET hive.optimize.index.filter=false;
+-- INPUT_RECORDS: 0 (no row groups)
+select count(*) from orc_ppd where t > 127;
+SET hive.optimize.index.filter=true;
+-- INPUT_RECORDS: 0 (no row groups)
+select count(*) from orc_ppd where t > 127;
+
+SET hive.optimize.index.filter=false;
+-- INPUT_RECORDS: 1000 (1 row group)
+select count(*) from orc_ppd where t = 55;
+SET hive.optimize.index.filter=true;
+-- INPUT_RECORDS: 1000 (1 row group)
+select count(*) from orc_ppd where t = 55;
+
+SET hive.optimize.index.filter=false;
+-- INPUT_RECORDS: 2000 (2 row groups)
+select count(*) from orc_ppd where t = 54;
+SET hive.optimize.index.filter=true;
+-- INPUT_RECORDS: 2000 (2 row groups)
+select count(*) from orc_ppd where t = 54;
+
+alter table orc_ppd change column t t bigint;
+
+SET hive.optimize.index.filter=false;
+-- INPUT_RECORDS: 0 (no row groups)
+select count(*) from orc_ppd where t > 127;
+SET hive.optimize.index.filter=true;
+-- INPUT_RECORDS: 0 (no row groups)
+select count(*) from orc_ppd where t > 127;
+
+SET hive.optimize.index.filter=false;
+-- INPUT_RECORDS: 1000 (1 row group)
+select count(*) from orc_ppd where t = 55;
+SET hive.optimize.index.filter=true;
+-- INPUT_RECORDS: 1000 (1 row group)
+select count(*) from orc_ppd where t = 55;
+
+SET hive.optimize.index.filter=false;
+-- INPUT_RECORDS: 2000 (2 row groups)
+select count(*) from orc_ppd where t = 54;
+SET hive.optimize.index.filter=true;
+-- INPUT_RECORDS: 2000 (2 row groups)
+select count(*) from orc_ppd where t = 54;
+
+alter table orc_ppd change column t t string;
+
+SET hive.optimize.index.filter=false;
+-- INPUT_RECORDS: 0 (no row groups)
+select count(*) from orc_ppd where t > '127';
+SET hive.optimize.index.filter=true;
+-- INPUT_RECORDS: 0 (no row groups)
+select count(*) from orc_ppd where t > '127';
+
+SET hive.optimize.index.filter=false;
+-- INPUT_RECORDS: 1000 (1 row group)
+select count(*) from orc_ppd where t = '55';
+SET hive.optimize.index.filter=true;
+-- INPUT_RECORDS: 1000 (1 row group)
+select count(*) from orc_ppd where t = '55';
+
+SET hive.optimize.index.filter=false;
+-- INPUT_RECORDS: 2000 (2 row groups)
+select count(*) from orc_ppd where t = '54';
+SET hive.optimize.index.filter=true;
+-- INPUT_RECORDS: 2000 (2 row groups)
+select count(*) from orc_ppd where t = '54';
+
+SET hive.optimize.index.filter=false;
+-- float tests
+select count(*) from orc_ppd where f = 74.72;
+SET hive.optimize.index.filter=true;
+select count(*) from orc_ppd where f = 74.72;
+
+alter table orc_ppd change column f f double;
+
+SET hive.optimize.index.filter=false;
+select count(*) from orc_ppd where f = 74.72;
+SET hive.optimize.index.filter=true;
+select count(*) from orc_ppd where f = 74.72;
+
+alter table orc_ppd change column f f string;
+
+SET hive.optimize.index.filter=false;
+select count(*) from orc_ppd where f = '74.72';
+SET hive.optimize.index.filter=true;
+select count(*) from orc_ppd where f = '74.72';
+
+SET hive.optimize.index.filter=false;
+-- string tests
+select count(*) from orc_ppd where s = 'bob davidson';
+SET hive.optimize.index.filter=true;
+select count(*) from orc_ppd where s = 'bob davidson';
+
+alter table orc_ppd change column s s char(50);
+
+SET hive.optimize.index.filter=false;
+select count(*) from orc_ppd where s = 'bob davidson';
+SET hive.optimize.index.filter=true;
+select count(*) from orc_ppd where s = 'bob davidson';
+
+alter table orc_ppd change column s s varchar(50);
+
+SET hive.optimize.index.filter=false;
+select count(*) from orc_ppd where s = 'bob davidson';
+SET hive.optimize.index.filter=true;
+select count(*) from orc_ppd where s = 'bob davidson';
+
+alter table orc_ppd change column s s char(50);
+
+SET hive.optimize.index.filter=false;
+select count(*) from orc_ppd where s = 'bob davidson';
+SET hive.optimize.index.filter=true;
+select count(*) from orc_ppd where s = 'bob davidson';
+
+alter table orc_ppd change column s s string;
+
+SET hive.optimize.index.filter=false;
+select count(*) from orc_ppd where s = 'bob davidson';
+SET hive.optimize.index.filter=true;
+select count(*) from orc_ppd where s = 'bob davidson';
+
+alter table orc_ppd add columns (boo boolean);
+
+SET hive.optimize.index.filter=false;
+-- ppd on newly added column
+select count(*) from orc_ppd where si = 442;
+select count(*) from orc_ppd where si = 442 or boo is not null or boo = false;
+SET hive.optimize.index.filter=true;
+select count(*) from orc_ppd where si = 442;
+select count(*) from orc_ppd where si = 442 or boo is not null or boo = false;
http://git-wip-us.apache.org/repos/asf/hive/blob/949eed2d/ql/src/test/results/clientpositive/orc_ppd_schema_evol_3a.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/orc_ppd_schema_evol_3a.q.out b/ql/src/test/results/clientpositive/orc_ppd_schema_evol_3a.q.out
new file mode 100644
index 0000000..494524e
--- /dev/null
+++ b/ql/src/test/results/clientpositive/orc_ppd_schema_evol_3a.q.out
@@ -0,0 +1,544 @@
+PREHOOK: query: CREATE TABLE staging(t tinyint,
+ si smallint,
+ i int,
+ b bigint,
+ f float,
+ d double,
+ bo boolean,
+ s string,
+ ts timestamp,
+ dec decimal(4,2),
+ bin binary)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@staging
+POSTHOOK: query: CREATE TABLE staging(t tinyint,
+ si smallint,
+ i int,
+ b bigint,
+ f float,
+ d double,
+ bo boolean,
+ s string,
+ ts timestamp,
+ dec decimal(4,2),
+ bin binary)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@staging
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE staging
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@staging
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE staging
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@staging
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' INTO TABLE staging
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@staging
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' INTO TABLE staging
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@staging
+PREHOOK: query: CREATE TABLE orc_ppd_staging(t tinyint,
+ si smallint,
+ i int,
+ b bigint,
+ f float,
+ d double,
+ bo boolean,
+ s string,
+ c char(50),
+ v varchar(50),
+ da date,
+ ts timestamp,
+ dec decimal(4,2),
+ bin binary)
+STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*")
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@orc_ppd_staging
+POSTHOOK: query: CREATE TABLE orc_ppd_staging(t tinyint,
+ si smallint,
+ i int,
+ b bigint,
+ f float,
+ d double,
+ bo boolean,
+ s string,
+ c char(50),
+ v varchar(50),
+ da date,
+ ts timestamp,
+ dec decimal(4,2),
+ bin binary)
+STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*")
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@orc_ppd_staging
+PREHOOK: query: insert overwrite table orc_ppd_staging select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), cast(ts as date), ts, dec, bin from staging order by t, s
+PREHOOK: type: QUERY
+PREHOOK: Input: default@staging
+PREHOOK: Output: default@orc_ppd_staging
+POSTHOOK: query: insert overwrite table orc_ppd_staging select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), cast(ts as date), ts, dec, bin from staging order by t, s
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@staging
+POSTHOOK: Output: default@orc_ppd_staging
+POSTHOOK: Lineage: orc_ppd_staging.b SIMPLE [(staging)staging.FieldSchema(name:b, type:bigint, comment:null), ]
+POSTHOOK: Lineage: orc_ppd_staging.bin SIMPLE [(staging)staging.FieldSchema(name:bin, type:binary, comment:null), ]
+POSTHOOK: Lineage: orc_ppd_staging.bo SIMPLE [(staging)staging.FieldSchema(name:bo, type:boolean, comment:null), ]
+POSTHOOK: Lineage: orc_ppd_staging.c EXPRESSION [(staging)staging.FieldSchema(name:s, type:string, comment:null), ]
+POSTHOOK: Lineage: orc_ppd_staging.d SIMPLE [(staging)staging.FieldSchema(name:d, type:double, comment:null), ]
+POSTHOOK: Lineage: orc_ppd_staging.da EXPRESSION [(staging)staging.FieldSchema(name:ts, type:timestamp, comment:null), ]
+POSTHOOK: Lineage: orc_ppd_staging.dec SIMPLE [(staging)staging.FieldSchema(name:dec, type:decimal(4,2), comment:null), ]
+POSTHOOK: Lineage: orc_ppd_staging.f SIMPLE [(staging)staging.FieldSchema(name:f, type:float, comment:null), ]
+POSTHOOK: Lineage: orc_ppd_staging.i SIMPLE [(staging)staging.FieldSchema(name:i, type:int, comment:null), ]
+POSTHOOK: Lineage: orc_ppd_staging.s SIMPLE [(staging)staging.FieldSchema(name:s, type:string, comment:null), ]
+POSTHOOK: Lineage: orc_ppd_staging.si SIMPLE [(staging)staging.FieldSchema(name:si, type:smallint, comment:null), ]
+POSTHOOK: Lineage: orc_ppd_staging.t SIMPLE [(staging)staging.FieldSchema(name:t, type:tinyint, comment:null), ]
+POSTHOOK: Lineage: orc_ppd_staging.ts SIMPLE [(staging)staging.FieldSchema(name:ts, type:timestamp, comment:null), ]
+POSTHOOK: Lineage: orc_ppd_staging.v EXPRESSION [(staging)staging.FieldSchema(name:s, type:string, comment:null), ]
+PREHOOK: query: -- just to introduce a gap in min/max range for bloom filters. The dataset has contiguous values
+-- which makes it hard to test bloom filters
+insert into orc_ppd_staging select -10,-321,-65680,-4294967430,-97.94,-13.07,true,"aaa","aaa","aaa","1990-03-11","1990-03-11 10:11:58.703308",-71.54,"aaa" from staging limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@staging
+PREHOOK: Output: default@orc_ppd_staging
+POSTHOOK: query: -- just to introduce a gap in min/max range for bloom filters. The dataset has contiguous values
+-- which makes it hard to test bloom filters
+insert into orc_ppd_staging select -10,-321,-65680,-4294967430,-97.94,-13.07,true,"aaa","aaa","aaa","1990-03-11","1990-03-11 10:11:58.703308",-71.54,"aaa" from staging limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@staging
+POSTHOOK: Output: default@orc_ppd_staging
+POSTHOOK: Lineage: orc_ppd_staging.b EXPRESSION []
+POSTHOOK: Lineage: orc_ppd_staging.bin EXPRESSION []
+POSTHOOK: Lineage: orc_ppd_staging.bo SIMPLE []
+POSTHOOK: Lineage: orc_ppd_staging.c EXPRESSION []
+POSTHOOK: Lineage: orc_ppd_staging.d EXPRESSION []
+POSTHOOK: Lineage: orc_ppd_staging.da EXPRESSION []
+POSTHOOK: Lineage: orc_ppd_staging.dec EXPRESSION []
+POSTHOOK: Lineage: orc_ppd_staging.f EXPRESSION []
+POSTHOOK: Lineage: orc_ppd_staging.i EXPRESSION []
+POSTHOOK: Lineage: orc_ppd_staging.s SIMPLE []
+POSTHOOK: Lineage: orc_ppd_staging.si EXPRESSION []
+POSTHOOK: Lineage: orc_ppd_staging.t EXPRESSION []
+POSTHOOK: Lineage: orc_ppd_staging.ts EXPRESSION []
+POSTHOOK: Lineage: orc_ppd_staging.v EXPRESSION []
+PREHOOK: query: insert into orc_ppd_staging select 127,331,65690,4294967440,107.94,23.07,true,"zzz","zzz","zzz","2023-03-11","2023-03-11 10:11:58.703308",71.54,"zzz" from staging limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@staging
+PREHOOK: Output: default@orc_ppd_staging
+POSTHOOK: query: insert into orc_ppd_staging select 127,331,65690,4294967440,107.94,23.07,true,"zzz","zzz","zzz","2023-03-11","2023-03-11 10:11:58.703308",71.54,"zzz" from staging limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@staging
+POSTHOOK: Output: default@orc_ppd_staging
+POSTHOOK: Lineage: orc_ppd_staging.b SIMPLE []
+POSTHOOK: Lineage: orc_ppd_staging.bin EXPRESSION []
+POSTHOOK: Lineage: orc_ppd_staging.bo SIMPLE []
+POSTHOOK: Lineage: orc_ppd_staging.c EXPRESSION []
+POSTHOOK: Lineage: orc_ppd_staging.d EXPRESSION []
+POSTHOOK: Lineage: orc_ppd_staging.da EXPRESSION []
+POSTHOOK: Lineage: orc_ppd_staging.dec SIMPLE []
+POSTHOOK: Lineage: orc_ppd_staging.f EXPRESSION []
+POSTHOOK: Lineage: orc_ppd_staging.i SIMPLE []
+POSTHOOK: Lineage: orc_ppd_staging.s SIMPLE []
+POSTHOOK: Lineage: orc_ppd_staging.si EXPRESSION []
+POSTHOOK: Lineage: orc_ppd_staging.t EXPRESSION []
+POSTHOOK: Lineage: orc_ppd_staging.ts EXPRESSION []
+POSTHOOK: Lineage: orc_ppd_staging.v EXPRESSION []
+PREHOOK: query: CREATE TABLE orc_ppd(t tinyint,
+ si smallint,
+ i int,
+ b bigint,
+ f float,
+ d double,
+ bo boolean,
+ s string,
+ c char(50),
+ v varchar(50),
+ da date,
+ ts timestamp,
+ dec decimal(4,2),
+ bin binary)
+STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*")
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@orc_ppd
+POSTHOOK: query: CREATE TABLE orc_ppd(t tinyint,
+ si smallint,
+ i int,
+ b bigint,
+ f float,
+ d double,
+ bo boolean,
+ s string,
+ c char(50),
+ v varchar(50),
+ da date,
+ ts timestamp,
+ dec decimal(4,2),
+ bin binary)
+STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*")
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@orc_ppd
+PREHOOK: query: insert overwrite table orc_ppd select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), cast(ts as date), ts, dec, bin from orc_ppd_staging order by t, s
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd_staging
+PREHOOK: Output: default@orc_ppd
+POSTHOOK: query: insert overwrite table orc_ppd select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), cast(ts as date), ts, dec, bin from orc_ppd_staging order by t, s
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orc_ppd_staging
+POSTHOOK: Output: default@orc_ppd
+POSTHOOK: Lineage: orc_ppd.b SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:b, type:bigint, comment:null), ]
+POSTHOOK: Lineage: orc_ppd.bin SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:bin, type:binary, comment:null), ]
+POSTHOOK: Lineage: orc_ppd.bo SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:bo, type:boolean, comment:null), ]
+POSTHOOK: Lineage: orc_ppd.c EXPRESSION [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:s, type:string, comment:null), ]
+POSTHOOK: Lineage: orc_ppd.d SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:d, type:double, comment:null), ]
+POSTHOOK: Lineage: orc_ppd.da EXPRESSION [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:ts, type:timestamp, comment:null), ]
+POSTHOOK: Lineage: orc_ppd.dec SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:dec, type:decimal(4,2), comment:null), ]
+POSTHOOK: Lineage: orc_ppd.f SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:f, type:float, comment:null), ]
+POSTHOOK: Lineage: orc_ppd.i SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:i, type:int, comment:null), ]
+POSTHOOK: Lineage: orc_ppd.s SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:s, type:string, comment:null), ]
+POSTHOOK: Lineage: orc_ppd.si SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:si, type:smallint, comment:null), ]
+POSTHOOK: Lineage: orc_ppd.t SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:t, type:tinyint, comment:null), ]
+POSTHOOK: Lineage: orc_ppd.ts SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:ts, type:timestamp, comment:null), ]
+POSTHOOK: Lineage: orc_ppd.v EXPRESSION [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:s, type:string, comment:null), ]
+PREHOOK: query: -- Row group statistics for column t:
+-- Entry 0: count: 994 hasNull: true min: -10 max: 54 sum: 26014 positions: 0,0,0,0,0,0,0
+-- Entry 1: count: 1000 hasNull: false min: 54 max: 118 sum: 86812 positions: 0,2,124,0,0,116,11
+-- Entry 2: count: 100 hasNull: false min: 118 max: 127 sum: 12151 positions: 0,4,119,0,0,244,19
+
+-- INPUT_RECORDS: 0 (no row groups)
+select count(*) from orc_ppd where t > 127
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+0
+PREHOOK: query: -- INPUT_RECORDS: 0 (no row groups)
+select count(*) from orc_ppd where t > 127
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+0
+PREHOOK: query: -- INPUT_RECORDS: 1000 (1 row group)
+select count(*) from orc_ppd where t = 55
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+8
+PREHOOK: query: -- INPUT_RECORDS: 1000 (1 row group)
+select count(*) from orc_ppd where t = 55
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+8
+PREHOOK: query: -- INPUT_RECORDS: 2000 (2 row groups)
+select count(*) from orc_ppd where t = 54
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+18
+PREHOOK: query: -- INPUT_RECORDS: 2000 (2 row groups)
+select count(*) from orc_ppd where t = 54
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+18
+PREHOOK: query: alter table orc_ppd change column t t smallint
+PREHOOK: type: ALTERTABLE_RENAMECOL
+PREHOOK: Input: default@orc_ppd
+PREHOOK: Output: default@orc_ppd
+PREHOOK: query: -- INPUT_RECORDS: 0 (no row groups)
+select count(*) from orc_ppd where t > 127
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+0
+PREHOOK: query: -- INPUT_RECORDS: 0 (no row groups)
+select count(*) from orc_ppd where t > 127
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+0
+PREHOOK: query: -- INPUT_RECORDS: 1000 (1 row group)
+select count(*) from orc_ppd where t = 55
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+8
+PREHOOK: query: -- INPUT_RECORDS: 1000 (1 row group)
+select count(*) from orc_ppd where t = 55
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+8
+PREHOOK: query: -- INPUT_RECORDS: 2000 (2 row groups)
+select count(*) from orc_ppd where t = 54
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+18
+PREHOOK: query: -- INPUT_RECORDS: 2000 (2 row groups)
+select count(*) from orc_ppd where t = 54
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+18
+PREHOOK: query: alter table orc_ppd change column t t int
+PREHOOK: type: ALTERTABLE_RENAMECOL
+PREHOOK: Input: default@orc_ppd
+PREHOOK: Output: default@orc_ppd
+PREHOOK: query: -- INPUT_RECORDS: 0 (no row groups)
+select count(*) from orc_ppd where t > 127
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+0
+PREHOOK: query: -- INPUT_RECORDS: 0 (no row groups)
+select count(*) from orc_ppd where t > 127
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+0
+PREHOOK: query: -- INPUT_RECORDS: 1000 (1 row group)
+select count(*) from orc_ppd where t = 55
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+8
+PREHOOK: query: -- INPUT_RECORDS: 1000 (1 row group)
+select count(*) from orc_ppd where t = 55
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+8
+PREHOOK: query: -- INPUT_RECORDS: 2000 (2 row groups)
+select count(*) from orc_ppd where t = 54
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+18
+PREHOOK: query: -- INPUT_RECORDS: 2000 (2 row groups)
+select count(*) from orc_ppd where t = 54
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+18
+PREHOOK: query: alter table orc_ppd change column t t bigint
+PREHOOK: type: ALTERTABLE_RENAMECOL
+PREHOOK: Input: default@orc_ppd
+PREHOOK: Output: default@orc_ppd
+PREHOOK: query: -- INPUT_RECORDS: 0 (no row groups)
+select count(*) from orc_ppd where t > 127
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+0
+PREHOOK: query: -- INPUT_RECORDS: 0 (no row groups)
+select count(*) from orc_ppd where t > 127
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+0
+PREHOOK: query: -- INPUT_RECORDS: 1000 (1 row group)
+select count(*) from orc_ppd where t = 55
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+8
+PREHOOK: query: -- INPUT_RECORDS: 1000 (1 row group)
+select count(*) from orc_ppd where t = 55
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+8
+PREHOOK: query: -- INPUT_RECORDS: 2000 (2 row groups)
+select count(*) from orc_ppd where t = 54
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+18
+PREHOOK: query: -- INPUT_RECORDS: 2000 (2 row groups)
+select count(*) from orc_ppd where t = 54
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+18
+PREHOOK: query: alter table orc_ppd change column t t string
+PREHOOK: type: ALTERTABLE_RENAMECOL
+PREHOOK: Input: default@orc_ppd
+PREHOOK: Output: default@orc_ppd
+PREHOOK: query: -- INPUT_RECORDS: 0 (no row groups)
+select count(*) from orc_ppd where t > '127'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+1566
+PREHOOK: query: -- INPUT_RECORDS: 0 (no row groups)
+select count(*) from orc_ppd where t > '127'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+1566
+PREHOOK: query: -- INPUT_RECORDS: 1000 (1 row group)
+select count(*) from orc_ppd where t = '55'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+8
+PREHOOK: query: -- INPUT_RECORDS: 1000 (1 row group)
+select count(*) from orc_ppd where t = '55'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+8
+PREHOOK: query: -- INPUT_RECORDS: 2000 (2 row groups)
+select count(*) from orc_ppd where t = '54'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+18
+PREHOOK: query: -- INPUT_RECORDS: 2000 (2 row groups)
+select count(*) from orc_ppd where t = '54'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+18
+PREHOOK: query: -- float tests
+select count(*) from orc_ppd where f = 74.72
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+2
+PREHOOK: query: select count(*) from orc_ppd where f = 74.72
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+2
+PREHOOK: query: alter table orc_ppd change column f f double
+PREHOOK: type: ALTERTABLE_RENAMECOL
+PREHOOK: Input: default@orc_ppd
+PREHOOK: Output: default@orc_ppd
+PREHOOK: query: select count(*) from orc_ppd where f = 74.72
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+2
+PREHOOK: query: select count(*) from orc_ppd where f = 74.72
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+2
+PREHOOK: query: alter table orc_ppd change column f f string
+PREHOOK: type: ALTERTABLE_RENAMECOL
+PREHOOK: Input: default@orc_ppd
+PREHOOK: Output: default@orc_ppd
+PREHOOK: query: select count(*) from orc_ppd where f = '74.72'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+2
+PREHOOK: query: select count(*) from orc_ppd where f = '74.72'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+2
+PREHOOK: query: -- string tests
+select count(*) from orc_ppd where s = 'bob davidson'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+6
+PREHOOK: query: select count(*) from orc_ppd where s = 'bob davidson'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+6
+PREHOOK: query: alter table orc_ppd change column s s char(50)
+PREHOOK: type: ALTERTABLE_RENAMECOL
+PREHOOK: Input: default@orc_ppd
+PREHOOK: Output: default@orc_ppd
+PREHOOK: query: select count(*) from orc_ppd where s = 'bob davidson'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+6
+PREHOOK: query: select count(*) from orc_ppd where s = 'bob davidson'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+6
+PREHOOK: query: alter table orc_ppd change column s s varchar(50)
+PREHOOK: type: ALTERTABLE_RENAMECOL
+PREHOOK: Input: default@orc_ppd
+PREHOOK: Output: default@orc_ppd
+PREHOOK: query: select count(*) from orc_ppd where s = 'bob davidson'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+6
+PREHOOK: query: select count(*) from orc_ppd where s = 'bob davidson'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+6
+PREHOOK: query: alter table orc_ppd change column s s char(50)
+PREHOOK: type: ALTERTABLE_RENAMECOL
+PREHOOK: Input: default@orc_ppd
+PREHOOK: Output: default@orc_ppd
+PREHOOK: query: select count(*) from orc_ppd where s = 'bob davidson'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+6
+PREHOOK: query: select count(*) from orc_ppd where s = 'bob davidson'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+6
+PREHOOK: query: alter table orc_ppd change column s s string
+PREHOOK: type: ALTERTABLE_RENAMECOL
+PREHOOK: Input: default@orc_ppd
+PREHOOK: Output: default@orc_ppd
+PREHOOK: query: select count(*) from orc_ppd where s = 'bob davidson'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+6
+PREHOOK: query: select count(*) from orc_ppd where s = 'bob davidson'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+6
+PREHOOK: query: alter table orc_ppd add columns (boo boolean)
+PREHOOK: type: ALTERTABLE_ADDCOLS
+PREHOOK: Input: default@orc_ppd
+PREHOOK: Output: default@orc_ppd
+PREHOOK: query: -- ppd on newly added column
+select count(*) from orc_ppd where si = 442
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+4
+PREHOOK: query: select count(*) from orc_ppd where si = 442 or boo is not null or boo = false
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+4
+PREHOOK: query: select count(*) from orc_ppd where si = 442
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+4
+PREHOOK: query: select count(*) from orc_ppd where si = 442 or boo is not null or boo = false
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd
+#### A masked pattern was here ####
+4