You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by xu...@apache.org on 2016/11/18 07:52:34 UTC
[1/4] hive git commit: HIVE-14815: Implement Parquet vectorization
reader for Primitive types(Ferdinand Xu, review by Chao Sun) This closes #104
Repository: hive
Updated Branches:
refs/heads/master 67c022f05 -> 936df7a15
http://git-wip-us.apache.org/repos/asf/hive/blob/936df7a1/ql/src/test/results/clientpositive/parquet_types_vectorization.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/parquet_types_vectorization.q.out b/ql/src/test/results/clientpositive/parquet_types_vectorization.q.out
new file mode 100644
index 0000000..7818d73
--- /dev/null
+++ b/ql/src/test/results/clientpositive/parquet_types_vectorization.q.out
@@ -0,0 +1,850 @@
+PREHOOK: query: DROP TABLE parquet_types_staging
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE parquet_types_staging
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: DROP TABLE parquet_types
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE parquet_types
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: CREATE TABLE parquet_types_staging (
+ cint int,
+ ctinyint tinyint,
+ csmallint smallint,
+ cfloat float,
+ cdouble double,
+ cstring1 string,
+ t timestamp,
+ cchar char(5),
+ cvarchar varchar(10),
+ cbinary string,
+ m1 map<string, varchar(3)>,
+ l1 array<int>,
+ st1 struct<c1:int, c2:char(1)>,
+ d date
+) ROW FORMAT DELIMITED
+FIELDS TERMINATED BY '|'
+COLLECTION ITEMS TERMINATED BY ','
+MAP KEYS TERMINATED BY ':'
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@parquet_types_staging
+POSTHOOK: query: CREATE TABLE parquet_types_staging (
+ cint int,
+ ctinyint tinyint,
+ csmallint smallint,
+ cfloat float,
+ cdouble double,
+ cstring1 string,
+ t timestamp,
+ cchar char(5),
+ cvarchar varchar(10),
+ cbinary string,
+ m1 map<string, varchar(3)>,
+ l1 array<int>,
+ st1 struct<c1:int, c2:char(1)>,
+ d date
+) ROW FORMAT DELIMITED
+FIELDS TERMINATED BY '|'
+COLLECTION ITEMS TERMINATED BY ','
+MAP KEYS TERMINATED BY ':'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@parquet_types_staging
+PREHOOK: query: CREATE TABLE parquet_types (
+ cint int,
+ ctinyint tinyint,
+ csmallint smallint,
+ cfloat float,
+ cdouble double,
+ cstring1 string,
+ t timestamp,
+ cchar char(5),
+ cvarchar varchar(10),
+ cbinary binary,
+ m1 map<string, varchar(3)>,
+ l1 array<int>,
+ st1 struct<c1:int, c2:char(1)>,
+ d date
+) STORED AS PARQUET
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@parquet_types
+POSTHOOK: query: CREATE TABLE parquet_types (
+ cint int,
+ ctinyint tinyint,
+ csmallint smallint,
+ cfloat float,
+ cdouble double,
+ cstring1 string,
+ t timestamp,
+ cchar char(5),
+ cvarchar varchar(10),
+ cbinary binary,
+ m1 map<string, varchar(3)>,
+ l1 array<int>,
+ st1 struct<c1:int, c2:char(1)>,
+ d date
+) STORED AS PARQUET
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@parquet_types
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/parquet_types.txt' OVERWRITE INTO TABLE parquet_types_staging
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@parquet_types_staging
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/parquet_types.txt' OVERWRITE INTO TABLE parquet_types_staging
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@parquet_types_staging
+PREHOOK: query: SELECT * FROM parquet_types_staging
+PREHOOK: type: QUERY
+PREHOOK: Input: default@parquet_types_staging
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM parquet_types_staging
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@parquet_types_staging
+#### A masked pattern was here ####
+100 1 1 1.0 0.0 abc 2011-01-01 01:01:01.111111111 a a B4F3CAFDBEDD {"k1":"v1"} [101,200] {"c1":10,"c2":"a"} 2011-01-01
+101 2 2 1.1 0.3 def 2012-02-02 02:02:02.222222222 ab ab 68692CCAC0BDE7 {"k2":"v2"} [102,200] {"c1":10,"c2":"d"} 2012-02-02
+102 3 3 1.2 0.6 ghi 2013-03-03 03:03:03.333333333 abc abc B4F3CAFDBEDD {"k3":"v3"} [103,200] {"c1":10,"c2":"g"} 2013-03-03
+103 1 4 1.3 0.9 jkl 2014-04-04 04:04:04.444444444 abcd abcd 68692CCAC0BDE7 {"k4":"v4"} [104,200] {"c1":10,"c2":"j"} 2014-04-04
+104 2 5 1.4 1.2 mno 2015-05-05 05:05:05.555555555 abcde abcde B4F3CAFDBEDD {"k5":"v5"} [105,200] {"c1":10,"c2":"m"} 2015-05-05
+105 3 1 1.0 1.5 pqr 2016-06-06 06:06:06.666666666 abcde abcdef 68692CCAC0BDE7 {"k6":"v6"} [106,200] {"c1":10,"c2":"p"} 2016-06-06
+106 1 2 1.1 1.8 stu 2017-07-07 07:07:07.777777777 abcde abcdefg B4F3CAFDBEDD {"k7":"v7"} [107,200] {"c1":10,"c2":"s"} 2017-07-07
+107 2 3 1.2 2.1 vwx 2018-08-08 08:08:08.888888888 bcdef abcdefgh 68692CCAC0BDE7 {"k8":"v8"} [108,200] {"c1":10,"c2":"v"} 2018-08-08
+108 3 4 1.3 2.4 yza 2019-09-09 09:09:09.999999999 cdefg B4F3CAFDBE 68656C6C6F {"k9":"v9"} [109,200] {"c1":10,"c2":"y"} 2019-09-09
+109 1 5 1.4 2.7 bcd 2020-10-10 10:10:10.101010101 klmno abcdedef 68692CCAC0BDE7 {"k10":"v10"} [110,200] {"c1":10,"c2":"b"} 2020-10-10
+110 2 1 1.0 3.0 efg 2021-11-11 11:11:11.111111111 pqrst abcdede B4F3CAFDBEDD {"k11":"v11"} [111,200] {"c1":10,"c2":"e"} 2021-11-11
+111 3 2 1.1 3.3 hij 2022-12-12 12:12:12.121212121 nopqr abcded 68692CCAC0BDE7 {"k12":"v12"} [112,200] {"c1":10,"c2":"h"} 2022-12-12
+112 1 3 1.2 3.6 klm 2023-01-02 13:13:13.131313131 opqrs abcdd B4F3CAFDBEDD {"k13":"v13"} [113,200] {"c1":10,"c2":"k"} 2023-01-02
+113 2 4 1.3 3.9 nop 2024-02-02 14:14:14.141414141 pqrst abc 68692CCAC0BDE7 {"k14":"v14"} [114,200] {"c1":10,"c2":"n"} 2024-02-02
+114 3 5 1.4 4.2 qrs 2025-03-03 15:15:15.151515151 qrstu b B4F3CAFDBEDD {"k15":"v15"} [115,200] {"c1":10,"c2":"q"} 2025-03-03
+115 1 1 1.0 4.5 qrs 2026-04-04 16:16:16.161616161 rstuv abcded 68692CCAC0BDE7 {"k16":"v16"} [116,200] {"c1":10,"c2":"q"} 2026-04-04
+116 2 2 1.1 4.8 wxy 2027-05-05 17:17:17.171717171 stuvw abcded B4F3CAFDBEDD {"k17":"v17"} [117,200] {"c1":10,"c2":"w"} 2027-05-05
+117 3 3 1.2 5.1 zab 2028-06-06 18:18:18.181818181 tuvwx abcded 68692CCAC0BDE7 {"k18":"v18"} [118,200] {"c1":10,"c2":"z"} 2028-06-06
+118 1 4 1.3 5.4 cde 2029-07-07 19:19:19.191919191 uvwzy abcdede B4F3CAFDBEDD {"k19":"v19"} [119,200] {"c1":10,"c2":"c"} 2029-07-07
+119 2 5 1.4 5.7 fgh 2030-08-08 20:20:20.202020202 vwxyz abcdede 68692CCAC0BDE7 {"k20":"v20"} [120,200] {"c1":10,"c2":"f"} 2030-08-08
+120 3 1 1.0 6.0 ijk 2031-09-09 21:21:21.212121212 wxyza abcde B4F3CAFDBEDD {"k21":"v21"} [121,200] {"c1":10,"c2":"i"} 2031-09-09
+121 1 2 1.1 6.3 lmn 2032-10-10 22:22:22.222222222 bcdef abcde {"k22":"v22"} [122,200] {"c1":10,"c2":"l"} 2032-10-10
+PREHOOK: query: INSERT OVERWRITE TABLE parquet_types
+SELECT cint, ctinyint, csmallint, cfloat, cdouble, cstring1, t, cchar, cvarchar,
+unhex(cbinary), m1, l1, st1, d FROM parquet_types_staging
+PREHOOK: type: QUERY
+PREHOOK: Input: default@parquet_types_staging
+PREHOOK: Output: default@parquet_types
+POSTHOOK: query: INSERT OVERWRITE TABLE parquet_types
+SELECT cint, ctinyint, csmallint, cfloat, cdouble, cstring1, t, cchar, cvarchar,
+unhex(cbinary), m1, l1, st1, d FROM parquet_types_staging
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@parquet_types_staging
+POSTHOOK: Output: default@parquet_types
+POSTHOOK: Lineage: parquet_types.cbinary EXPRESSION [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cbinary, type:string, comment:null), ]
+POSTHOOK: Lineage: parquet_types.cchar SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cchar, type:char(5), comment:null), ]
+POSTHOOK: Lineage: parquet_types.cdouble SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cdouble, type:double, comment:null), ]
+POSTHOOK: Lineage: parquet_types.cfloat SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cfloat, type:float, comment:null), ]
+POSTHOOK: Lineage: parquet_types.cint SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cint, type:int, comment:null), ]
+POSTHOOK: Lineage: parquet_types.csmallint SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:csmallint, type:smallint, comment:null), ]
+POSTHOOK: Lineage: parquet_types.cstring1 SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cstring1, type:string, comment:null), ]
+POSTHOOK: Lineage: parquet_types.ctinyint SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:ctinyint, type:tinyint, comment:null), ]
+POSTHOOK: Lineage: parquet_types.cvarchar SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cvarchar, type:varchar(10), comment:null), ]
+POSTHOOK: Lineage: parquet_types.d SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:d, type:date, comment:null), ]
+POSTHOOK: Lineage: parquet_types.l1 SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:l1, type:array<int>, comment:null), ]
+POSTHOOK: Lineage: parquet_types.m1 SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:m1, type:map<string,varchar(3)>, comment:null), ]
+POSTHOOK: Lineage: parquet_types.st1 SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:st1, type:struct<c1:int,c2:char(1)>, comment:null), ]
+POSTHOOK: Lineage: parquet_types.t SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:t, type:timestamp, comment:null), ]
+PREHOOK: query: -- test types in group by
+
+EXPLAIN SELECT ctinyint,
+ MAX(cint),
+ MIN(csmallint),
+ COUNT(cstring1),
+ ROUND(AVG(cfloat), 5),
+ ROUND(STDDEV_POP(cdouble),5)
+FROM parquet_types
+GROUP BY ctinyint
+ORDER BY ctinyint
+PREHOOK: type: QUERY
+POSTHOOK: query: -- test types in group by
+
+EXPLAIN SELECT ctinyint,
+ MAX(cint),
+ MIN(csmallint),
+ COUNT(cstring1),
+ ROUND(AVG(cfloat), 5),
+ ROUND(STDDEV_POP(cdouble),5)
+FROM parquet_types
+GROUP BY ctinyint
+ORDER BY ctinyint
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: parquet_types
+ Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ctinyint (type: tinyint), cint (type: int), csmallint (type: smallint), cstring1 (type: string), cfloat (type: float), cdouble (type: double)
+ outputColumnNames: ctinyint, cint, csmallint, cstring1, cfloat, cdouble
+ Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: max(cint), min(csmallint), count(cstring1), avg(cfloat), stddev_pop(cdouble)
+ keys: ctinyint (type: tinyint)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: tinyint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: tinyint)
+ Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: struct<count:bigint,sum:double,input:float>), _col5 (type: struct<count:bigint,sum:double,variance:double>)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: max(VALUE._col0), min(VALUE._col1), count(VALUE._col2), avg(VALUE._col3), stddev_pop(VALUE._col4)
+ keys: KEY._col0 (type: tinyint)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: tinyint), _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), round(_col4, 5) (type: double), round(_col5, 5) (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: tinyint)
+ sort order: +
+ Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: double), _col5 (type: double)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: tinyint), VALUE._col0 (type: int), VALUE._col1 (type: smallint), VALUE._col2 (type: bigint), VALUE._col3 (type: double), VALUE._col4 (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT ctinyint,
+ MAX(cint),
+ MIN(csmallint),
+ COUNT(cstring1),
+ ROUND(AVG(cfloat), 5),
+ ROUND(STDDEV_POP(cdouble),5)
+FROM parquet_types
+GROUP BY ctinyint
+ORDER BY ctinyint
+PREHOOK: type: QUERY
+PREHOOK: Input: default@parquet_types
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT ctinyint,
+ MAX(cint),
+ MIN(csmallint),
+ COUNT(cstring1),
+ ROUND(AVG(cfloat), 5),
+ ROUND(STDDEV_POP(cdouble),5)
+FROM parquet_types
+GROUP BY ctinyint
+ORDER BY ctinyint
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@parquet_types
+#### A masked pattern was here ####
+1 121 1 8 1.175 2.06216
+2 119 1 7 1.21429 1.8
+3 120 1 7 1.17143 1.8
+PREHOOK: query: EXPLAIN SELECT cfloat, count(*) FROM parquet_types GROUP BY cfloat ORDER BY cfloat
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT cfloat, count(*) FROM parquet_types GROUP BY cfloat ORDER BY cfloat
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: parquet_types
+ Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cfloat (type: float)
+ outputColumnNames: cfloat
+ Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ keys: cfloat (type: float)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: float)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: float)
+ Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: float)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: float)
+ sort order: +
+ Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: float), VALUE._col0 (type: bigint)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT cfloat, count(*) FROM parquet_types GROUP BY cfloat ORDER BY cfloat
+PREHOOK: type: QUERY
+PREHOOK: Input: default@parquet_types
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT cfloat, count(*) FROM parquet_types GROUP BY cfloat ORDER BY cfloat
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@parquet_types
+#### A masked pattern was here ####
+1.0 5
+1.1 5
+1.2 4
+1.3 4
+1.4 4
+PREHOOK: query: EXPLAIN SELECT cchar, count(*) FROM parquet_types GROUP BY cchar ORDER BY cchar
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT cchar, count(*) FROM parquet_types GROUP BY cchar ORDER BY cchar
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: parquet_types
+ Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cchar (type: char(5))
+ outputColumnNames: cchar
+ Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ keys: cchar (type: char(5))
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: char(5))
+ sort order: +
+ Map-reduce partition columns: _col0 (type: char(5))
+ Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: char(5))
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: char(5))
+ sort order: +
+ Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: char(5)), VALUE._col0 (type: bigint)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT cchar, count(*) FROM parquet_types GROUP BY cchar ORDER BY cchar
+PREHOOK: type: QUERY
+PREHOOK: Input: default@parquet_types
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT cchar, count(*) FROM parquet_types GROUP BY cchar ORDER BY cchar
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@parquet_types
+#### A masked pattern was here ####
+a 1
+ab 1
+abc 1
+abcd 1
+abcde 3
+bcdef 2
+cdefg 1
+klmno 1
+nopqr 1
+opqrs 1
+pqrst 2
+qrstu 1
+rstuv 1
+stuvw 1
+tuvwx 1
+uvwzy 1
+vwxyz 1
+wxyza 1
+PREHOOK: query: EXPLAIN SELECT cvarchar, count(*) FROM parquet_types GROUP BY cvarchar ORDER BY cvarchar
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT cvarchar, count(*) FROM parquet_types GROUP BY cvarchar ORDER BY cvarchar
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: parquet_types
+ Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cvarchar (type: varchar(10))
+ outputColumnNames: cvarchar
+ Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ keys: cvarchar (type: varchar(10))
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: varchar(10))
+ sort order: +
+ Map-reduce partition columns: _col0 (type: varchar(10))
+ Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: varchar(10))
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: varchar(10))
+ sort order: +
+ Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: varchar(10)), VALUE._col0 (type: bigint)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT cvarchar, count(*) FROM parquet_types GROUP BY cvarchar ORDER BY cvarchar
+PREHOOK: type: QUERY
+PREHOOK: Input: default@parquet_types
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT cvarchar, count(*) FROM parquet_types GROUP BY cvarchar ORDER BY cvarchar
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@parquet_types
+#### A masked pattern was here ####
+B4F3CAFDBE 1
+a 1
+ab 1
+abc 2
+abcd 1
+abcdd 1
+abcde 3
+abcded 4
+abcdede 3
+abcdedef 1
+abcdef 1
+abcdefg 1
+abcdefgh 1
+b 1
+PREHOOK: query: EXPLAIN SELECT cstring1, count(*) FROM parquet_types GROUP BY cstring1 ORDER BY cstring1
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT cstring1, count(*) FROM parquet_types GROUP BY cstring1 ORDER BY cstring1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: parquet_types
+ Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cstring1 (type: string)
+ outputColumnNames: cstring1
+ Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ keys: cstring1 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT cstring1, count(*) FROM parquet_types GROUP BY cstring1 ORDER BY cstring1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@parquet_types
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT cstring1, count(*) FROM parquet_types GROUP BY cstring1 ORDER BY cstring1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@parquet_types
+#### A masked pattern was here ####
+abc 1
+bcd 1
+cde 1
+def 1
+efg 1
+fgh 1
+ghi 1
+hij 1
+ijk 1
+jkl 1
+klm 1
+lmn 1
+mno 1
+nop 1
+pqr 1
+qrs 2
+stu 1
+vwx 1
+wxy 1
+yza 1
+zab 1
+PREHOOK: query: EXPLAIN SELECT t, count(*) FROM parquet_types GROUP BY t ORDER BY t
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT t, count(*) FROM parquet_types GROUP BY t ORDER BY t
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: parquet_types
+ Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: t (type: timestamp)
+ outputColumnNames: t
+ Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ keys: t (type: timestamp)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: timestamp)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: timestamp)
+ Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: timestamp)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: timestamp)
+ sort order: +
+ Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: timestamp), VALUE._col0 (type: bigint)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT t, count(*) FROM parquet_types GROUP BY t ORDER BY t
+PREHOOK: type: QUERY
+PREHOOK: Input: default@parquet_types
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT t, count(*) FROM parquet_types GROUP BY t ORDER BY t
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@parquet_types
+#### A masked pattern was here ####
+2011-01-01 01:01:01.111111111 1
+2012-02-02 02:02:02.222222222 1
+2013-03-03 03:03:03.333333333 1
+2014-04-04 04:04:04.444444444 1
+2015-05-05 05:05:05.555555555 1
+2016-06-06 06:06:06.666666666 1
+2017-07-07 07:07:07.777777777 1
+2018-08-08 08:08:08.888888888 1
+2019-09-09 09:09:09.999999999 1
+2020-10-10 10:10:10.101010101 1
+2021-11-11 11:11:11.111111111 1
+2022-12-12 12:12:12.121212121 1
+2023-01-02 13:13:13.131313131 1
+2024-02-02 14:14:14.141414141 1
+2025-03-03 15:15:15.151515151 1
+2026-04-04 16:16:16.161616161 1
+2027-05-05 17:17:17.171717171 1
+2028-06-06 18:18:18.181818181 1
+2029-07-07 19:19:19.191919191 1
+2030-08-08 20:20:20.202020202 1
+2031-09-09 21:21:21.212121212 1
+2032-10-10 22:22:22.222222222 1
+PREHOOK: query: EXPLAIN SELECT hex(cbinary), count(*) FROM parquet_types GROUP BY cbinary
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT hex(cbinary), count(*) FROM parquet_types GROUP BY cbinary
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: parquet_types
+ Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cbinary (type: binary)
+ outputColumnNames: cbinary
+ Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ keys: cbinary (type: binary)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: binary)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: binary)
+ Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: binary)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: hex(_col0) (type: string), _col1 (type: bigint)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT hex(cbinary), count(*) FROM parquet_types GROUP BY cbinary
+PREHOOK: type: QUERY
+PREHOOK: Input: default@parquet_types
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT hex(cbinary), count(*) FROM parquet_types GROUP BY cbinary
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@parquet_types
+#### A masked pattern was here ####
+ 1
+68656C6C6F 1
+68692CCAC0BDE7 10
+B4F3CAFDBEDD 10
http://git-wip-us.apache.org/repos/asf/hive/blob/936df7a1/ql/src/test/results/clientpositive/vectorized_parquet_types.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vectorized_parquet_types.q.out b/ql/src/test/results/clientpositive/vectorized_parquet_types.q.out
index 281fe93..f493102 100644
--- a/ql/src/test/results/clientpositive/vectorized_parquet_types.q.out
+++ b/ql/src/test/results/clientpositive/vectorized_parquet_types.q.out
@@ -149,6 +149,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
Stage: Stage-0
Fetch Operator
@@ -216,6 +217,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
Stage: Stage-0
Fetch Operator
@@ -305,6 +307,7 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: tinyint)
Statistics: Num rows: 22 Data size: 242 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: struct<count:bigint,sum:double,input:float>), _col5 (type: struct<count:bigint,sum:double,variance:double>), _col6 (type: decimal(4,2))
+ Execution mode: vectorized
Reduce Operator Tree:
Group By Operator
aggregations: max(VALUE._col0), min(VALUE._col1), count(VALUE._col2), avg(VALUE._col3), stddev_pop(VALUE._col4), max(VALUE._col5)
[2/4] hive git commit: HIVE-14815: Implement Parquet vectorization
reader for Primitive types(Ferdinand Xu, review by Chao Sun) This closes #104
Posted by xu...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/936df7a1/ql/src/test/results/clientpositive/parquet_types_non_dictionary_encoding_vectorization.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/parquet_types_non_dictionary_encoding_vectorization.q.out b/ql/src/test/results/clientpositive/parquet_types_non_dictionary_encoding_vectorization.q.out
new file mode 100644
index 0000000..a9f5e48
--- /dev/null
+++ b/ql/src/test/results/clientpositive/parquet_types_non_dictionary_encoding_vectorization.q.out
@@ -0,0 +1,2452 @@
+PREHOOK: query: DROP TABLE parquet_types_staging
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE parquet_types_staging
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: DROP TABLE parquet_types
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE parquet_types
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: CREATE TABLE parquet_types_staging (
+ cint int,
+ ctinyint tinyint,
+ csmallint smallint,
+ cfloat float,
+ cdouble double,
+ cstring1 string,
+ t timestamp,
+ cchar char(5),
+ cvarchar varchar(10),
+ cbinary string,
+ m1 map<string, varchar(3)>,
+ l1 array<int>,
+ st1 struct<c1:int, c2:char(1)>,
+ d date
+) ROW FORMAT DELIMITED
+FIELDS TERMINATED BY '|'
+COLLECTION ITEMS TERMINATED BY ','
+MAP KEYS TERMINATED BY ':'
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@parquet_types_staging
+POSTHOOK: query: CREATE TABLE parquet_types_staging (
+ cint int,
+ ctinyint tinyint,
+ csmallint smallint,
+ cfloat float,
+ cdouble double,
+ cstring1 string,
+ t timestamp,
+ cchar char(5),
+ cvarchar varchar(10),
+ cbinary string,
+ m1 map<string, varchar(3)>,
+ l1 array<int>,
+ st1 struct<c1:int, c2:char(1)>,
+ d date
+) ROW FORMAT DELIMITED
+FIELDS TERMINATED BY '|'
+COLLECTION ITEMS TERMINATED BY ','
+MAP KEYS TERMINATED BY ':'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@parquet_types_staging
+PREHOOK: query: CREATE TABLE parquet_types (
+ cint int,
+ ctinyint tinyint,
+ csmallint smallint,
+ cfloat float,
+ cdouble double,
+ cstring1 string,
+ t timestamp,
+ cchar char(5),
+ cvarchar varchar(10),
+ cbinary binary,
+ m1 map<string, varchar(3)>,
+ l1 array<int>,
+ st1 struct<c1:int, c2:char(1)>,
+ d date
+) STORED AS PARQUET
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@parquet_types
+POSTHOOK: query: CREATE TABLE parquet_types (
+ cint int,
+ ctinyint tinyint,
+ csmallint smallint,
+ cfloat float,
+ cdouble double,
+ cstring1 string,
+ t timestamp,
+ cchar char(5),
+ cvarchar varchar(10),
+ cbinary binary,
+ m1 map<string, varchar(3)>,
+ l1 array<int>,
+ st1 struct<c1:int, c2:char(1)>,
+ d date
+) STORED AS PARQUET
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@parquet_types
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/parquet_non_dictionary_types.txt' OVERWRITE INTO TABLE
+parquet_types_staging
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@parquet_types_staging
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/parquet_non_dictionary_types.txt' OVERWRITE INTO TABLE
+parquet_types_staging
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@parquet_types_staging
+PREHOOK: query: SELECT * FROM parquet_types_staging
+PREHOOK: type: QUERY
+PREHOOK: Input: default@parquet_types_staging
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM parquet_types_staging
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@parquet_types_staging
+#### A masked pattern was here ####
+1000 -128 0 0.0 0.3 1940-01-01 01:01:01.111111111 {"":""} [1000,1001] {"c1":1000,"c2":"b"} 1940-01-01
+1001 -127 1 0.3 1.3 b 1941-02-02 01:01:01.111111111 b b b {"b":"b"} [1001,1002] {"c1":1001,"c2":"c"} 1941-02-02
+1002 -126 2 0.6 2.3 c 1942-03-03 01:01:01.111111111 c c c {"c":"c"} [1002,1003] {"c1":1002,"c2":"d"} 1942-03-03
+1003 -125 3 0.9 3.3 d 1943-04-04 01:01:01.111111111 d d d {"d":"d"} [1003,1004] {"c1":1003,"c2":"e"} 1943-04-04
+1004 -124 4 1.2 4.3 e 1944-05-05 01:01:01.111111111 e e e {"e":"e"} [1004,1005] {"c1":1004,"c2":"f"} 1944-05-05
+1005 -123 5 1.5 5.3 f 1945-06-06 01:01:01.111111111 f f f {"f":"f"} [1005,1006] {"c1":1005,"c2":"g"} 1945-06-06
+1006 -122 6 1.8 6.3 g 1946-07-07 01:01:01.111111111 g g g {"g":"g"} [1006,1007] {"c1":1006,"c2":"h"} 1946-07-07
+1007 -121 7 2.1 7.3 h 1947-08-08 01:01:01.111111111 h h h {"h":"h"} [1007,1008] {"c1":1007,"c2":"i"} 1947-08-08
+1008 -120 8 2.4 8.3 i 1948-09-09 01:01:01.111111111 i i i {"i":"i"} [1008,1009] {"c1":1008,"c2":"j"} 1948-09-09
+1009 -119 9 2.7 9.3 j 1949-10-10 01:01:01.111111111 j j j {"j":"j"} [1009,1010] {"c1":1009,"c2":"k"} 1949-10-10
+1010 -118 10 3.0 10.3 k 1950-11-11 01:01:01.111111111 k k k {"k":"k"} [1010,1011] {"c1":1010,"c2":"l"} 1950-11-11
+1011 -117 11 3.3 11.3 l 1951-12-12 01:01:01.111111111 l l l {"l":"l"} [1011,1012] {"c1":1011,"c2":"m"} 1951-12-12
+1012 -116 12 3.6 12.3 m 1952-01-13 01:01:01.111111111 m m m {"m":"m"} [1012,1013] {"c1":1012,"c2":"n"} 1952-01-13
+1013 -115 13 3.9 13.3 n 1953-02-14 01:01:01.111111111 n n n {"n":"n"} [1013,1014] {"c1":1013,"c2":"o"} 1953-02-14
+1014 -114 14 4.2 14.3 o 1954-03-15 01:01:01.111111111 o o o {"o":"o"} [1014,1015] {"c1":1014,"c2":"p"} 1954-03-15
+1015 -113 15 4.5 15.3 p 1955-04-16 01:01:01.111111111 p p p {"p":"p"} [1015,1016] {"c1":1015,"c2":"q"} 1955-04-16
+1016 -112 16 4.8 16.3 q 1956-05-17 01:01:01.111111111 q q q {"q":"q"} [1016,1017] {"c1":1016,"c2":"r"} 1956-05-17
+1017 -111 17 5.1 17.3 r 1957-06-18 01:01:01.111111111 r r r {"r":"r"} [1017,1018] {"c1":1017,"c2":"s"} 1957-06-18
+1018 -110 18 5.4 18.3 s 1958-07-19 01:01:01.111111111 s s s {"s":"s"} [1018,1019] {"c1":1018,"c2":"t"} 1958-07-19
+1019 -109 19 5.7 19.3 t 1959-08-20 01:01:01.111111111 t t t {"t":"t"} [1019,1020] {"c1":1019,"c2":"u"} 1959-08-20
+1020 -108 20 6.0 20.3 u 1960-09-21 01:01:01.111111111 u u u {"u":"u"} [1020,1021] {"c1":1020,"c2":"v"} 1960-09-21
+1021 -107 21 6.3 21.3 v 1961-10-22 01:01:01.111111111 v v v {"v":"v"} [1021,1022] {"c1":1021,"c2":"w"} 1961-10-22
+1022 -106 22 6.6 22.3 w 1962-11-23 01:01:01.111111111 w w w {"w":"w"} [1022,1023] {"c1":1022,"c2":"x"} 1962-11-23
+1023 -105 23 6.9 23.3 x 1963-12-24 01:01:01.111111111 x x x {"x":"x"} [1023,1024] {"c1":1023,"c2":"y"} 1963-12-24
+1024 -104 24 7.2 24.3 y 1964-01-25 01:01:01.111111111 y y y {"y":"y"} [1024,1025] {"c1":1024,"c2":"z"} 1964-01-25
+1025 -103 25 7.5 25.3 z 1965-02-26 01:01:01.111111111 z z z {"z":"z"} [1025,1026] {"c1":1025,"c2":"b"} 1965-02-26
+1026 -102 26 7.8 26.3 ba 1966-03-27 01:01:01.111111111 ba ba ba {"ba":"ba"} [1026,1027] {"c1":1026,"c2":"b"} 1966-03-27
+1027 -101 27 8.1 27.3 bb 1967-04-01 01:01:01.111111111 bb bb bb {"bb":"bb"} [1027,1028] {"c1":1027,"c2":"b"} 1967-04-01
+1028 -100 28 8.4 28.3 bc 1968-05-02 01:01:01.111111111 bc bc bc {"bc":"bc"} [1028,1029] {"c1":1028,"c2":"b"} 1968-05-02
+1029 -99 29 8.7 29.3 bd 1969-06-03 01:01:01.111111111 bd bd bd {"bd":"bd"} [1029,1030] {"c1":1029,"c2":"b"} 1969-06-03
+1030 -98 30 9.0 30.3 be 1970-07-04 01:01:01.111111111 be be be {"be":"be"} [1030,1031] {"c1":1030,"c2":"b"} 1970-07-04
+1031 -97 31 9.3 31.3 bf 1971-08-05 01:01:01.111111111 bf bf bf {"bf":"bf"} [1031,1032] {"c1":1031,"c2":"b"} 1971-08-05
+1032 -96 32 9.6 32.3 bg 1972-09-06 01:01:01.111111111 bg bg bg {"bg":"bg"} [1032,1033] {"c1":1032,"c2":"b"} 1972-09-06
+1033 -95 33 9.9 33.3 bh 1973-10-07 01:01:01.111111111 bh bh bh {"bh":"bh"} [1033,1034] {"c1":1033,"c2":"b"} 1973-10-07
+1034 -94 34 10.2 34.3 bi 1974-11-08 01:01:01.111111111 bi bi bi {"bi":"bi"} [1034,1035] {"c1":1034,"c2":"b"} 1974-11-08
+1035 -93 35 10.5 35.3 bj 1975-12-09 01:01:01.111111111 bj bj bj {"bj":"bj"} [1035,1036] {"c1":1035,"c2":"b"} 1975-12-09
+1036 -92 36 10.8 36.3 bk 1976-01-10 01:01:01.111111111 bk bk bk {"bk":"bk"} [1036,1037] {"c1":1036,"c2":"b"} 1976-01-10
+1037 -91 37 11.1 37.3 bl 1977-02-11 01:01:01.111111111 bl bl bl {"bl":"bl"} [1037,1038] {"c1":1037,"c2":"b"} 1977-02-11
+1038 -90 38 11.4 38.3 bm 1978-03-12 01:01:01.111111111 bm bm bm {"bm":"bm"} [1038,1039] {"c1":1038,"c2":"b"} 1978-03-12
+1039 -89 39 11.7 39.3 bn 1979-04-13 01:01:01.111111111 bn bn bn {"bn":"bn"} [1039,1040] {"c1":1039,"c2":"b"} 1979-04-13
+1040 -88 40 12.0 40.3 bo 1980-05-14 01:01:01.111111111 bo bo bo {"bo":"bo"} [1040,1041] {"c1":1040,"c2":"b"} 1980-05-14
+1041 -87 41 12.3 41.3 bp 1981-06-15 01:01:01.111111111 bp bp bp {"bp":"bp"} [1041,1042] {"c1":1041,"c2":"b"} 1981-06-15
+1042 -86 42 12.6 42.3 bq 1982-07-16 01:01:01.111111111 bq bq bq {"bq":"bq"} [1042,1043] {"c1":1042,"c2":"b"} 1982-07-16
+1043 -85 43 12.9 43.3 br 1983-08-17 01:01:01.111111111 br br br {"br":"br"} [1043,1044] {"c1":1043,"c2":"b"} 1983-08-17
+1044 -84 44 13.2 44.3 bs 1984-09-18 01:01:01.111111111 bs bs bs {"bs":"bs"} [1044,1045] {"c1":1044,"c2":"b"} 1984-09-18
+1045 -83 45 13.5 45.3 bt 1985-10-19 01:01:01.111111111 bt bt bt {"bt":"bt"} [1045,1046] {"c1":1045,"c2":"b"} 1985-10-19
+1046 -82 46 13.8 46.3 bu 1986-11-20 01:01:01.111111111 bu bu bu {"bu":"bu"} [1046,1047] {"c1":1046,"c2":"b"} 1986-11-20
+1047 -81 47 14.1 47.3 bv 1987-12-21 01:01:01.111111111 bv bv bv {"bv":"bv"} [1047,1048] {"c1":1047,"c2":"b"} 1987-12-21
+1048 -80 48 14.4 48.3 bw 1988-01-22 01:01:01.111111111 bw bw bw {"bw":"bw"} [1048,1049] {"c1":1048,"c2":"b"} 1988-01-22
+1049 -79 49 14.7 49.3 bx 1989-02-23 01:01:01.111111111 bx bx bx {"bx":"bx"} [1049,1050] {"c1":1049,"c2":"b"} 1989-02-23
+1050 -78 50 15.0 50.3 by 1990-03-24 01:01:01.111111111 by by by {"by":"by"} [1050,1051] {"c1":1050,"c2":"b"} 1990-03-24
+1051 -77 51 15.3 51.3 bz 1991-04-25 01:01:01.111111111 bz bz bz {"bz":"bz"} [1051,1052] {"c1":1051,"c2":"c"} 1991-04-25
+1052 -76 52 15.6 52.3 ca 1992-05-26 01:01:01.111111111 ca ca ca {"ca":"ca"} [1052,1053] {"c1":1052,"c2":"c"} 1992-05-26
+1053 -75 53 15.9 53.3 cb 1993-06-27 01:01:01.111111111 cb cb cb {"cb":"cb"} [1053,1054] {"c1":1053,"c2":"c"} 1993-06-27
+1054 -74 54 16.2 54.3 cc 1994-07-01 01:01:01.111111111 cc cc cc {"cc":"cc"} [1054,1055] {"c1":1054,"c2":"c"} 1994-07-01
+1055 -73 55 16.5 55.3 cd 1995-08-02 01:01:01.111111111 cd cd cd {"cd":"cd"} [1055,1056] {"c1":1055,"c2":"c"} 1995-08-02
+1056 -72 56 16.8 56.3 ce 1996-09-03 01:01:01.111111111 ce ce ce {"ce":"ce"} [1056,1057] {"c1":1056,"c2":"c"} 1996-09-03
+1057 -71 57 17.1 57.3 cf 1997-10-04 01:01:01.111111111 cf cf cf {"cf":"cf"} [1057,1058] {"c1":1057,"c2":"c"} 1997-10-04
+1058 -70 58 17.4 58.3 cg 1998-11-05 01:01:01.111111111 cg cg cg {"cg":"cg"} [1058,1059] {"c1":1058,"c2":"c"} 1998-11-05
+1059 -69 59 17.7 59.3 ch 1999-12-06 01:01:01.111111111 ch ch ch {"ch":"ch"} [1059,1060] {"c1":1059,"c2":"c"} 1999-12-06
+1060 -68 60 18.0 60.3 ci 2000-01-07 01:01:01.111111111 ci ci ci {"ci":"ci"} [1060,1061] {"c1":1060,"c2":"c"} 2000-01-07
+1061 -67 61 18.3 61.3 cj 2001-02-08 01:01:01.111111111 cj cj cj {"cj":"cj"} [1061,1062] {"c1":1061,"c2":"c"} 2001-02-08
+1062 -66 62 18.6 62.3 ck 2002-03-09 01:01:01.111111111 ck ck ck {"ck":"ck"} [1062,1063] {"c1":1062,"c2":"c"} 2002-03-09
+1063 -65 63 18.9 63.3 cl 2003-04-10 01:01:01.111111111 cl cl cl {"cl":"cl"} [1063,1064] {"c1":1063,"c2":"c"} 2003-04-10
+1064 -64 64 19.2 64.3 cm 2004-05-11 01:01:01.111111111 cm cm cm {"cm":"cm"} [1064,1065] {"c1":1064,"c2":"c"} 2004-05-11
+1065 -63 65 19.5 65.3 cn 2005-06-12 01:01:01.111111111 cn cn cn {"cn":"cn"} [1065,1066] {"c1":1065,"c2":"c"} 2005-06-12
+1066 -62 66 19.8 66.3 co 2006-07-13 01:01:01.111111111 co co co {"co":"co"} [1066,1067] {"c1":1066,"c2":"c"} 2006-07-13
+1067 -61 67 20.1 67.3 cp 2007-08-14 01:01:01.111111111 cp cp cp {"cp":"cp"} [1067,1068] {"c1":1067,"c2":"c"} 2007-08-14
+1068 -60 68 20.4 68.3 cq 2008-09-15 01:01:01.111111111 cq cq cq {"cq":"cq"} [1068,1069] {"c1":1068,"c2":"c"} 2008-09-15
+1069 -59 69 20.7 69.3 cr 2009-10-16 01:01:01.111111111 cr cr cr {"cr":"cr"} [1069,1070] {"c1":1069,"c2":"c"} 2009-10-16
+1070 -58 70 21.0 70.3 cs 2010-11-17 01:01:01.111111111 cs cs cs {"cs":"cs"} [1070,1071] {"c1":1070,"c2":"c"} 2010-11-17
+1071 -57 71 21.3 71.3 ct 2011-12-18 01:01:01.111111111 ct ct ct {"ct":"ct"} [1071,1072] {"c1":1071,"c2":"c"} 2011-12-18
+1072 -56 72 21.6 72.3 cu 2012-01-19 01:01:01.111111111 cu cu cu {"cu":"cu"} [1072,1073] {"c1":1072,"c2":"c"} 2012-01-19
+1073 -55 73 21.9 73.3 cv 2013-02-20 01:01:01.111111111 cv cv cv {"cv":"cv"} [1073,1074] {"c1":1073,"c2":"c"} 2013-02-20
+1074 -54 74 22.2 74.3 cw 2014-03-21 01:01:01.111111111 cw cw cw {"cw":"cw"} [1074,1075] {"c1":1074,"c2":"c"} 2014-03-21
+1075 -53 75 22.5 75.3 cx 2015-04-22 01:01:01.111111111 cx cx cx {"cx":"cx"} [1075,1076] {"c1":1075,"c2":"c"} 2015-04-22
+1076 -52 76 22.8 76.3 cy 2016-05-23 01:01:01.111111111 cy cy cy {"cy":"cy"} [1076,1077] {"c1":1076,"c2":"c"} 2016-05-23
+1077 -51 77 23.1 77.3 cz 2017-06-24 01:01:01.111111111 cz cz cz {"cz":"cz"} [1077,1078] {"c1":1077,"c2":"d"} 2017-06-24
+1078 -50 78 23.4 78.3 da 2018-07-25 01:01:01.111111111 da da da {"da":"da"} [1078,1079] {"c1":1078,"c2":"d"} 2018-07-25
+1079 -49 79 23.7 79.3 db 2019-08-26 01:01:01.111111111 db db db {"db":"db"} [1079,1080] {"c1":1079,"c2":"d"} 2019-08-26
+1080 -48 80 24.0 80.3 dc 2020-09-27 01:01:01.111111111 dc dc dc {"dc":"dc"} [1080,1081] {"c1":1080,"c2":"d"} 2020-09-27
+1081 -47 81 24.3 81.3 dd 2021-10-01 01:01:01.111111111 dd dd dd {"dd":"dd"} [1081,1082] {"c1":1081,"c2":"d"} 2021-10-01
+1082 -46 82 24.6 82.3 de 2022-11-02 01:01:01.111111111 de de de {"de":"de"} [1082,1083] {"c1":1082,"c2":"d"} 2022-11-02
+1083 -45 83 24.9 83.3 df 2023-12-03 01:01:01.111111111 df df df {"df":"df"} [1083,1084] {"c1":1083,"c2":"d"} 2023-12-03
+1084 -44 84 25.2 84.3 dg 2024-01-04 01:01:01.111111111 dg dg dg {"dg":"dg"} [1084,1085] {"c1":1084,"c2":"d"} 2024-01-04
+1085 -43 85 25.5 85.3 dh 2025-02-05 01:01:01.111111111 dh dh dh {"dh":"dh"} [1085,1086] {"c1":1085,"c2":"d"} 2025-02-05
+1086 -42 86 25.8 86.3 di 2026-03-06 01:01:01.111111111 di di di {"di":"di"} [1086,1087] {"c1":1086,"c2":"d"} 2026-03-06
+1087 -41 87 26.1 87.3 dj 2027-04-07 01:01:01.111111111 dj dj dj {"dj":"dj"} [1087,1088] {"c1":1087,"c2":"d"} 2027-04-07
+1088 -40 88 26.4 88.3 dk 2028-05-08 01:01:01.111111111 dk dk dk {"dk":"dk"} [1088,1089] {"c1":1088,"c2":"d"} 2028-05-08
+1089 -39 89 26.7 89.3 dl 2029-06-09 01:01:01.111111111 dl dl dl {"dl":"dl"} [1089,1090] {"c1":1089,"c2":"d"} 2029-06-09
+1090 -38 90 27.0 90.3 dm 2030-07-10 01:01:01.111111111 dm dm dm {"dm":"dm"} [1090,1091] {"c1":1090,"c2":"d"} 2030-07-10
+1091 -37 91 27.3 91.3 dn 2031-08-11 01:01:01.111111111 dn dn dn {"dn":"dn"} [1091,1092] {"c1":1091,"c2":"d"} 2031-08-11
+1092 -36 92 27.6 92.3 do 2032-09-12 01:01:01.111111111 do do do {"do":"do"} [1092,1093] {"c1":1092,"c2":"d"} 2032-09-12
+1093 -35 93 27.9 93.3 dp 2033-10-13 01:01:01.111111111 dp dp dp {"dp":"dp"} [1093,1094] {"c1":1093,"c2":"d"} 2033-10-13
+1094 -34 94 28.2 94.3 dq 2034-11-14 01:01:01.111111111 dq dq dq {"dq":"dq"} [1094,1095] {"c1":1094,"c2":"d"} 2034-11-14
+1095 -33 95 28.5 95.3 dr 2035-12-15 01:01:01.111111111 dr dr dr {"dr":"dr"} [1095,1096] {"c1":1095,"c2":"d"} 2035-12-15
+1096 -32 96 28.8 96.3 ds 2036-01-16 01:01:01.111111111 ds ds ds {"ds":"ds"} [1096,1097] {"c1":1096,"c2":"d"} 2036-01-16
+1097 -31 97 29.1 97.3 dt 2037-02-17 01:01:01.111111111 dt dt dt {"dt":"dt"} [1097,1098] {"c1":1097,"c2":"d"} 2037-02-17
+1098 -30 98 29.4 98.3 du 2038-03-18 01:01:01.111111111 du du du {"du":"du"} [1098,1099] {"c1":1098,"c2":"d"} 2038-03-18
+1099 -29 99 29.7 99.3 dv 2039-04-19 01:01:01.111111111 dv dv dv {"dv":"dv"} [1099,1100] {"c1":1099,"c2":"d"} 2039-04-19
+1100 -28 100 30.0 100.3 dw 2040-05-20 01:01:01.111111111 dw dw dw {"dw":"dw"} [1100,1101] {"c1":1100,"c2":"d"} 2040-05-20
+1101 -27 101 30.3 101.3 dx 2041-06-21 01:01:01.111111111 dx dx dx {"dx":"dx"} [1101,1102] {"c1":1101,"c2":"d"} 2041-06-21
+1102 -26 102 30.6 102.3 dy 2042-07-22 01:01:01.111111111 dy dy dy {"dy":"dy"} [1102,1103] {"c1":1102,"c2":"d"} 2042-07-22
+1103 -25 103 30.9 103.3 dz 2043-08-23 01:01:01.111111111 dz dz dz {"dz":"dz"} [1103,1104] {"c1":1103,"c2":"e"} 2043-08-23
+1104 -24 104 31.2 104.3 ea 2044-09-24 01:01:01.111111111 ea ea ea {"ea":"ea"} [1104,1105] {"c1":1104,"c2":"e"} 2044-09-24
+1105 -23 105 31.5 105.3 eb 2045-10-25 01:01:01.111111111 eb eb eb {"eb":"eb"} [1105,1106] {"c1":1105,"c2":"e"} 2045-10-25
+1106 -22 106 31.8 106.3 ec 2046-11-26 01:01:01.111111111 ec ec ec {"ec":"ec"} [1106,1107] {"c1":1106,"c2":"e"} 2046-11-26
+1107 -21 107 32.1 107.3 ed 2047-12-27 01:01:01.111111111 ed ed ed {"ed":"ed"} [1107,1108] {"c1":1107,"c2":"e"} 2047-12-27
+1108 -20 108 32.4 108.3 ee 2048-01-01 01:01:01.111111111 ee ee ee {"ee":"ee"} [1108,1109] {"c1":1108,"c2":"e"} 2048-01-01
+1109 -19 109 32.7 109.3 ef 2049-02-02 01:01:01.111111111 ef ef ef {"ef":"ef"} [1109,1110] {"c1":1109,"c2":"e"} 2049-02-02
+1110 -18 110 33.0 110.3 eg 2050-03-03 01:01:01.111111111 eg eg eg {"eg":"eg"} [1110,1111] {"c1":1110,"c2":"e"} 2050-03-03
+1111 -17 111 33.3 111.3 eh 2051-04-04 01:01:01.111111111 eh eh eh {"eh":"eh"} [1111,1112] {"c1":1111,"c2":"e"} 2051-04-04
+1112 -16 112 33.6 112.3 ei 2052-05-05 01:01:01.111111111 ei ei ei {"ei":"ei"} [1112,1113] {"c1":1112,"c2":"e"} 2052-05-05
+1113 -15 113 33.9 113.3 ej 2053-06-06 01:01:01.111111111 ej ej ej {"ej":"ej"} [1113,1114] {"c1":1113,"c2":"e"} 2053-06-06
+1114 -14 114 34.2 114.3 ek 2054-07-07 01:01:01.111111111 ek ek ek {"ek":"ek"} [1114,1115] {"c1":1114,"c2":"e"} 2054-07-07
+1115 -13 115 34.5 115.3 el 2055-08-08 01:01:01.111111111 el el el {"el":"el"} [1115,1116] {"c1":1115,"c2":"e"} 2055-08-08
+1116 -12 116 34.8 116.3 em 2056-09-09 01:01:01.111111111 em em em {"em":"em"} [1116,1117] {"c1":1116,"c2":"e"} 2056-09-09
+1117 -11 117 35.1 117.3 en 2057-10-10 01:01:01.111111111 en en en {"en":"en"} [1117,1118] {"c1":1117,"c2":"e"} 2057-10-10
+1118 -10 118 35.4 118.3 eo 2058-11-11 01:01:01.111111111 eo eo eo {"eo":"eo"} [1118,1119] {"c1":1118,"c2":"e"} 2058-11-11
+1119 -9 119 35.7 119.3 ep 2059-12-12 01:01:01.111111111 ep ep ep {"ep":"ep"} [1119,1120] {"c1":1119,"c2":"e"} 2059-12-12
+1120 -8 120 36.0 120.3 eq 2060-01-13 01:01:01.111111111 eq eq eq {"eq":"eq"} [1120,1121] {"c1":1120,"c2":"e"} 2060-01-13
+1121 -7 121 36.3 121.3 er 2061-02-14 01:01:01.111111111 er er er {"er":"er"} [1121,1122] {"c1":1121,"c2":"e"} 2061-02-14
+1122 -6 122 36.6 122.3 es 2062-03-15 01:01:01.111111111 es es es {"es":"es"} [1122,1123] {"c1":1122,"c2":"e"} 2062-03-15
+1123 -5 123 36.9 123.3 et 2063-04-16 01:01:01.111111111 et et et {"et":"et"} [1123,1124] {"c1":1123,"c2":"e"} 2063-04-16
+1124 -4 124 37.2 124.3 eu 2064-05-17 01:01:01.111111111 eu eu eu {"eu":"eu"} [1124,1125] {"c1":1124,"c2":"e"} 2064-05-17
+1125 -3 125 37.5 125.3 ev 2065-06-18 01:01:01.111111111 ev ev ev {"ev":"ev"} [1125,1126] {"c1":1125,"c2":"e"} 2065-06-18
+1126 -2 126 37.8 126.3 ew 2066-07-19 01:01:01.111111111 ew ew ew {"ew":"ew"} [1126,1127] {"c1":1126,"c2":"e"} 2066-07-19
+1127 -1 127 38.1 127.3 ex 2067-08-20 01:01:01.111111111 ex ex ex {"ex":"ex"} [1127,1128] {"c1":1127,"c2":"e"} 2067-08-20
+1128 0 128 38.4 128.3 ey 2068-09-21 01:01:01.111111111 ey ey ey {"ey":"ey"} [1128,1129] {"c1":1128,"c2":"e"} 2068-09-21
+1129 1 129 38.7 129.3 ez 2069-10-22 01:01:01.111111111 ez ez ez {"ez":"ez"} [1129,1130] {"c1":1129,"c2":"f"} 2069-10-22
+1130 2 130 39.0 130.3 fa 2070-11-23 01:01:01.111111111 fa fa fa {"fa":"fa"} [1130,1131] {"c1":1130,"c2":"f"} 2070-11-23
+1131 3 131 39.3 131.3 fb 2071-12-24 01:01:01.111111111 fb fb fb {"fb":"fb"} [1131,1132] {"c1":1131,"c2":"f"} 2071-12-24
+1132 4 132 39.6 132.3 fc 2072-01-25 01:01:01.111111111 fc fc fc {"fc":"fc"} [1132,1133] {"c1":1132,"c2":"f"} 2072-01-25
+1133 5 133 39.9 133.3 fd 2073-02-26 01:01:01.111111111 fd fd fd {"fd":"fd"} [1133,1134] {"c1":1133,"c2":"f"} 2073-02-26
+1134 6 134 40.2 134.3 fe 2074-03-27 01:01:01.111111111 fe fe fe {"fe":"fe"} [1134,1135] {"c1":1134,"c2":"f"} 2074-03-27
+1135 7 135 40.5 135.3 ff 2075-04-01 01:01:01.111111111 ff ff ff {"ff":"ff"} [1135,1136] {"c1":1135,"c2":"f"} 2075-04-01
+1136 8 136 40.8 136.3 fg 2076-05-02 01:01:01.111111111 fg fg fg {"fg":"fg"} [1136,1137] {"c1":1136,"c2":"f"} 2076-05-02
+1137 9 137 41.1 137.3 fh 2077-06-03 01:01:01.111111111 fh fh fh {"fh":"fh"} [1137,1138] {"c1":1137,"c2":"f"} 2077-06-03
+1138 10 138 41.4 138.3 fi 2078-07-04 01:01:01.111111111 fi fi fi {"fi":"fi"} [1138,1139] {"c1":1138,"c2":"f"} 2078-07-04
+1139 11 139 41.7 139.3 fj 2079-08-05 01:01:01.111111111 fj fj fj {"fj":"fj"} [1139,1140] {"c1":1139,"c2":"f"} 2079-08-05
+1140 12 140 42.0 140.3 fk 2080-09-06 01:01:01.111111111 fk fk fk {"fk":"fk"} [1140,1141] {"c1":1140,"c2":"f"} 2080-09-06
+1141 13 141 42.3 141.3 fl 2081-10-07 01:01:01.111111111 fl fl fl {"fl":"fl"} [1141,1142] {"c1":1141,"c2":"f"} 2081-10-07
+1142 14 142 42.6 142.3 fm 2082-11-08 01:01:01.111111111 fm fm fm {"fm":"fm"} [1142,1143] {"c1":1142,"c2":"f"} 2082-11-08
+1143 15 143 42.9 143.3 fn 2083-12-09 01:01:01.111111111 fn fn fn {"fn":"fn"} [1143,1144] {"c1":1143,"c2":"f"} 2083-12-09
+1144 16 144 43.2 144.3 fo 2084-01-10 01:01:01.111111111 fo fo fo {"fo":"fo"} [1144,1145] {"c1":1144,"c2":"f"} 2084-01-10
+1145 17 145 43.5 145.3 fp 2085-02-11 01:01:01.111111111 fp fp fp {"fp":"fp"} [1145,1146] {"c1":1145,"c2":"f"} 2085-02-11
+1146 18 146 43.8 146.3 fq 2086-03-12 01:01:01.111111111 fq fq fq {"fq":"fq"} [1146,1147] {"c1":1146,"c2":"f"} 2086-03-12
+1147 19 147 44.1 147.3 fr 2087-04-13 01:01:01.111111111 fr fr fr {"fr":"fr"} [1147,1148] {"c1":1147,"c2":"f"} 2087-04-13
+1148 20 148 44.4 148.3 fs 2088-05-14 01:01:01.111111111 fs fs fs {"fs":"fs"} [1148,1149] {"c1":1148,"c2":"f"} 2088-05-14
+1149 21 149 44.7 149.3 ft 2089-06-15 01:01:01.111111111 ft ft ft {"ft":"ft"} [1149,1150] {"c1":1149,"c2":"f"} 2089-06-15
+1150 22 150 45.0 150.3 fu 2090-07-16 01:01:01.111111111 fu fu fu {"fu":"fu"} [1150,1151] {"c1":1150,"c2":"f"} 2090-07-16
+1151 23 151 45.3 151.3 fv 2091-08-17 01:01:01.111111111 fv fv fv {"fv":"fv"} [1151,1152] {"c1":1151,"c2":"f"} 2091-08-17
+1152 24 152 45.6 152.3 fw 2092-09-18 01:01:01.111111111 fw fw fw {"fw":"fw"} [1152,1153] {"c1":1152,"c2":"f"} 2092-09-18
+1153 25 153 45.9 153.3 fx 2093-10-19 01:01:01.111111111 fx fx fx {"fx":"fx"} [1153,1154] {"c1":1153,"c2":"f"} 2093-10-19
+1154 26 154 46.2 154.3 fy 2094-11-20 01:01:01.111111111 fy fy fy {"fy":"fy"} [1154,1155] {"c1":1154,"c2":"f"} 2094-11-20
+1155 27 155 46.5 155.3 fz 2095-12-21 01:01:01.111111111 fz fz fz {"fz":"fz"} [1155,1156] {"c1":1155,"c2":"g"} 2095-12-21
+1156 28 156 46.8 156.3 ga 2096-01-22 01:01:01.111111111 ga ga ga {"ga":"ga"} [1156,1157] {"c1":1156,"c2":"g"} 2096-01-22
+1157 29 157 47.1 157.3 gb 2097-02-23 01:01:01.111111111 gb gb gb {"gb":"gb"} [1157,1158] {"c1":1157,"c2":"g"} 2097-02-23
+1158 30 158 47.4 158.3 gc 2098-03-24 01:01:01.111111111 gc gc gc {"gc":"gc"} [1158,1159] {"c1":1158,"c2":"g"} 2098-03-24
+1159 31 159 47.7 159.3 gd 2099-04-25 01:01:01.111111111 gd gd gd {"gd":"gd"} [1159,1160] {"c1":1159,"c2":"g"} 2099-04-25
+1160 32 160 48.0 160.3 ge 2100-05-26 01:01:01.111111111 ge ge ge {"ge":"ge"} [1160,1161] {"c1":1160,"c2":"g"} 2100-05-26
+1161 33 161 48.3 161.3 gf 2101-06-27 01:01:01.111111111 gf gf gf {"gf":"gf"} [1161,1162] {"c1":1161,"c2":"g"} 2101-06-27
+1162 34 162 48.6 162.3 gg 2102-07-01 01:01:01.111111111 gg gg gg {"gg":"gg"} [1162,1163] {"c1":1162,"c2":"g"} 2102-07-01
+1163 35 163 48.9 163.3 gh 2103-08-02 01:01:01.111111111 gh gh gh {"gh":"gh"} [1163,1164] {"c1":1163,"c2":"g"} 2103-08-02
+1164 36 164 49.2 164.3 gi 2104-09-03 01:01:01.111111111 gi gi gi {"gi":"gi"} [1164,1165] {"c1":1164,"c2":"g"} 2104-09-03
+1165 37 165 49.5 165.3 gj 2105-10-04 01:01:01.111111111 gj gj gj {"gj":"gj"} [1165,1166] {"c1":1165,"c2":"g"} 2105-10-04
+1166 38 166 49.8 166.3 gk 2106-11-05 01:01:01.111111111 gk gk gk {"gk":"gk"} [1166,1167] {"c1":1166,"c2":"g"} 2106-11-05
+1167 39 167 50.1 167.3 gl 2107-12-06 01:01:01.111111111 gl gl gl {"gl":"gl"} [1167,1168] {"c1":1167,"c2":"g"} 2107-12-06
+1168 40 168 50.4 168.3 gm 2108-01-07 01:01:01.111111111 gm gm gm {"gm":"gm"} [1168,1169] {"c1":1168,"c2":"g"} 2108-01-07
+1169 41 169 50.7 169.3 gn 2109-02-08 01:01:01.111111111 gn gn gn {"gn":"gn"} [1169,1170] {"c1":1169,"c2":"g"} 2109-02-08
+1170 42 170 51.0 170.3 go 2110-03-09 01:01:01.111111111 go go go {"go":"go"} [1170,1171] {"c1":1170,"c2":"g"} 2110-03-09
+1171 43 171 51.3 171.3 gp 2111-04-10 01:01:01.111111111 gp gp gp {"gp":"gp"} [1171,1172] {"c1":1171,"c2":"g"} 2111-04-10
+1172 44 172 51.6 172.3 gq 2112-05-11 01:01:01.111111111 gq gq gq {"gq":"gq"} [1172,1173] {"c1":1172,"c2":"g"} 2112-05-11
+1173 45 173 51.9 173.3 gr 2113-06-12 01:01:01.111111111 gr gr gr {"gr":"gr"} [1173,1174] {"c1":1173,"c2":"g"} 2113-06-12
+1174 46 174 52.2 174.3 gs 2114-07-13 01:01:01.111111111 gs gs gs {"gs":"gs"} [1174,1175] {"c1":1174,"c2":"g"} 2114-07-13
+1175 47 175 52.5 175.3 gt 2115-08-14 01:01:01.111111111 gt gt gt {"gt":"gt"} [1175,1176] {"c1":1175,"c2":"g"} 2115-08-14
+1176 48 176 52.8 176.3 gu 2116-09-15 01:01:01.111111111 gu gu gu {"gu":"gu"} [1176,1177] {"c1":1176,"c2":"g"} 2116-09-15
+1177 49 177 53.1 177.3 gv 2117-10-16 01:01:01.111111111 gv gv gv {"gv":"gv"} [1177,1178] {"c1":1177,"c2":"g"} 2117-10-16
+1178 50 178 53.4 178.3 gw 2118-11-17 01:01:01.111111111 gw gw gw {"gw":"gw"} [1178,1179] {"c1":1178,"c2":"g"} 2118-11-17
+1179 51 179 53.7 179.3 gx 2119-12-18 01:01:01.111111111 gx gx gx {"gx":"gx"} [1179,1180] {"c1":1179,"c2":"g"} 2119-12-18
+1180 52 180 54.0 180.3 gy 2120-01-19 01:01:01.111111111 gy gy gy {"gy":"gy"} [1180,1181] {"c1":1180,"c2":"g"} 2120-01-19
+1181 53 181 54.3 181.3 gz 2121-02-20 01:01:01.111111111 gz gz gz {"gz":"gz"} [1181,1182] {"c1":1181,"c2":"h"} 2121-02-20
+1182 54 182 54.6 182.3 ha 2122-03-21 01:01:01.111111111 ha ha ha {"ha":"ha"} [1182,1183] {"c1":1182,"c2":"h"} 2122-03-21
+1183 55 183 54.9 183.3 hb 2123-04-22 01:01:01.111111111 hb hb hb {"hb":"hb"} [1183,1184] {"c1":1183,"c2":"h"} 2123-04-22
+1184 56 184 55.2 184.3 hc 2124-05-23 01:01:01.111111111 hc hc hc {"hc":"hc"} [1184,1185] {"c1":1184,"c2":"h"} 2124-05-23
+1185 57 185 55.5 185.3 hd 2125-06-24 01:01:01.111111111 hd hd hd {"hd":"hd"} [1185,1186] {"c1":1185,"c2":"h"} 2125-06-24
+1186 58 186 55.8 186.3 he 2126-07-25 01:01:01.111111111 he he he {"he":"he"} [1186,1187] {"c1":1186,"c2":"h"} 2126-07-25
+1187 59 187 56.1 187.3 hf 2127-08-26 01:01:01.111111111 hf hf hf {"hf":"hf"} [1187,1188] {"c1":1187,"c2":"h"} 2127-08-26
+1188 60 188 56.4 188.3 hg 2128-09-27 01:01:01.111111111 hg hg hg {"hg":"hg"} [1188,1189] {"c1":1188,"c2":"h"} 2128-09-27
+1189 61 189 56.7 189.3 hh 2129-10-01 01:01:01.111111111 hh hh hh {"hh":"hh"} [1189,1190] {"c1":1189,"c2":"h"} 2129-10-01
+1190 62 190 57.0 190.3 hi 2130-11-02 01:01:01.111111111 hi hi hi {"hi":"hi"} [1190,1191] {"c1":1190,"c2":"h"} 2130-11-02
+1191 63 191 57.3 191.3 hj 2131-12-03 01:01:01.111111111 hj hj hj {"hj":"hj"} [1191,1192] {"c1":1191,"c2":"h"} 2131-12-03
+1192 64 192 57.6 192.3 hk 2132-01-04 01:01:01.111111111 hk hk hk {"hk":"hk"} [1192,1193] {"c1":1192,"c2":"h"} 2132-01-04
+1193 65 193 57.9 193.3 hl 2133-02-05 01:01:01.111111111 hl hl hl {"hl":"hl"} [1193,1194] {"c1":1193,"c2":"h"} 2133-02-05
+1194 66 194 58.2 194.3 hm 2134-03-06 01:01:01.111111111 hm hm hm {"hm":"hm"} [1194,1195] {"c1":1194,"c2":"h"} 2134-03-06
+1195 67 195 58.5 195.3 hn 2135-04-07 01:01:01.111111111 hn hn hn {"hn":"hn"} [1195,1196] {"c1":1195,"c2":"h"} 2135-04-07
+1196 68 196 58.8 196.3 ho 2136-05-08 01:01:01.111111111 ho ho ho {"ho":"ho"} [1196,1197] {"c1":1196,"c2":"h"} 2136-05-08
+1197 69 197 59.1 197.3 hp 2137-06-09 01:01:01.111111111 hp hp hp {"hp":"hp"} [1197,1198] {"c1":1197,"c2":"h"} 2137-06-09
+1198 70 198 59.4 198.3 hq 2138-07-10 01:01:01.111111111 hq hq hq {"hq":"hq"} [1198,1199] {"c1":1198,"c2":"h"} 2138-07-10
+1199 71 199 59.7 199.3 hr 2139-08-11 01:01:01.111111111 hr hr hr {"hr":"hr"} [1199,1200] {"c1":1199,"c2":"h"} 2139-08-11
+1200 72 200 60.0 200.3 hs 2140-09-12 01:01:01.111111111 hs hs hs {"hs":"hs"} [1200,1201] {"c1":1200,"c2":"h"} 2140-09-12
+1201 73 201 60.3 201.3 ht 2141-10-13 01:01:01.111111111 ht ht ht {"ht":"ht"} [1201,1202] {"c1":1201,"c2":"h"} 2141-10-13
+1202 74 202 60.6 202.3 hu 2142-11-14 01:01:01.111111111 hu hu hu {"hu":"hu"} [1202,1203] {"c1":1202,"c2":"h"} 2142-11-14
+1203 75 203 60.9 203.3 hv 2143-12-15 01:01:01.111111111 hv hv hv {"hv":"hv"} [1203,1204] {"c1":1203,"c2":"h"} 2143-12-15
+1204 76 204 61.2 204.3 hw 2144-01-16 01:01:01.111111111 hw hw hw {"hw":"hw"} [1204,1205] {"c1":1204,"c2":"h"} 2144-01-16
+1205 77 205 61.5 205.3 hx 2145-02-17 01:01:01.111111111 hx hx hx {"hx":"hx"} [1205,1206] {"c1":1205,"c2":"h"} 2145-02-17
+1206 78 206 61.8 206.3 hy 2146-03-18 01:01:01.111111111 hy hy hy {"hy":"hy"} [1206,1207] {"c1":1206,"c2":"h"} 2146-03-18
+1207 79 207 62.1 207.3 hz 2147-04-19 01:01:01.111111111 hz hz hz {"hz":"hz"} [1207,1208] {"c1":1207,"c2":"i"} 2147-04-19
+1208 80 208 62.4 208.3 ia 2148-05-20 01:01:01.111111111 ia ia ia {"ia":"ia"} [1208,1209] {"c1":1208,"c2":"i"} 2148-05-20
+1209 81 209 62.7 209.3 ib 2149-06-21 01:01:01.111111111 ib ib ib {"ib":"ib"} [1209,1210] {"c1":1209,"c2":"i"} 2149-06-21
+1210 82 210 63.0 210.3 ic 2150-07-22 01:01:01.111111111 ic ic ic {"ic":"ic"} [1210,1211] {"c1":1210,"c2":"i"} 2150-07-22
+1211 83 211 63.3 211.3 id 2151-08-23 01:01:01.111111111 id id id {"id":"id"} [1211,1212] {"c1":1211,"c2":"i"} 2151-08-23
+1212 84 212 63.6 212.3 ie 2152-09-24 01:01:01.111111111 ie ie ie {"ie":"ie"} [1212,1213] {"c1":1212,"c2":"i"} 2152-09-24
+1213 85 213 63.9 213.3 if 2153-10-25 01:01:01.111111111 if if if {"if":"if"} [1213,1214] {"c1":1213,"c2":"i"} 2153-10-25
+1214 86 214 64.2 214.3 ig 2154-11-26 01:01:01.111111111 ig ig ig {"ig":"ig"} [1214,1215] {"c1":1214,"c2":"i"} 2154-11-26
+1215 87 215 64.5 215.3 ih 2155-12-27 01:01:01.111111111 ih ih ih {"ih":"ih"} [1215,1216] {"c1":1215,"c2":"i"} 2155-12-27
+1216 88 216 64.8 216.3 ii 2156-01-01 01:01:01.111111111 ii ii ii {"ii":"ii"} [1216,1217] {"c1":1216,"c2":"i"} 2156-01-01
+1217 89 217 65.1 217.3 ij 2157-02-02 01:01:01.111111111 ij ij ij {"ij":"ij"} [1217,1218] {"c1":1217,"c2":"i"} 2157-02-02
+1218 90 218 65.4 218.3 ik 2158-03-03 01:01:01.111111111 ik ik ik {"ik":"ik"} [1218,1219] {"c1":1218,"c2":"i"} 2158-03-03
+1219 91 219 65.7 219.3 il 2159-04-04 01:01:01.111111111 il il il {"il":"il"} [1219,1220] {"c1":1219,"c2":"i"} 2159-04-04
+1220 92 220 66.0 220.3 im 2160-05-05 01:01:01.111111111 im im im {"im":"im"} [1220,1221] {"c1":1220,"c2":"i"} 2160-05-05
+1221 93 221 66.3 221.3 in 2161-06-06 01:01:01.111111111 in in in {"in":"in"} [1221,1222] {"c1":1221,"c2":"i"} 2161-06-06
+1222 94 222 66.6 222.3 io 2162-07-07 01:01:01.111111111 io io io {"io":"io"} [1222,1223] {"c1":1222,"c2":"i"} 2162-07-07
+1223 95 223 66.9 223.3 ip 2163-08-08 01:01:01.111111111 ip ip ip {"ip":"ip"} [1223,1224] {"c1":1223,"c2":"i"} 2163-08-08
+1224 96 224 67.2 224.3 iq 2164-09-09 01:01:01.111111111 iq iq iq {"iq":"iq"} [1224,1225] {"c1":1224,"c2":"i"} 2164-09-09
+1225 97 225 67.5 225.3 ir 2165-10-10 01:01:01.111111111 ir ir ir {"ir":"ir"} [1225,1226] {"c1":1225,"c2":"i"} 2165-10-10
+1226 98 226 67.8 226.3 is 2166-11-11 01:01:01.111111111 is is is {"is":"is"} [1226,1227] {"c1":1226,"c2":"i"} 2166-11-11
+1227 99 227 68.1 227.3 it 2167-12-12 01:01:01.111111111 it it it {"it":"it"} [1227,1228] {"c1":1227,"c2":"i"} 2167-12-12
+1228 100 228 68.4 228.3 iu 2168-01-13 01:01:01.111111111 iu iu iu {"iu":"iu"} [1228,1229] {"c1":1228,"c2":"i"} 2168-01-13
+1229 101 229 68.7 229.3 iv 2169-02-14 01:01:01.111111111 iv iv iv {"iv":"iv"} [1229,1230] {"c1":1229,"c2":"i"} 2169-02-14
+1230 102 230 69.0 230.3 iw 2170-03-15 01:01:01.111111111 iw iw iw {"iw":"iw"} [1230,1231] {"c1":1230,"c2":"i"} 2170-03-15
+1231 103 231 69.3 231.3 ix 2171-04-16 01:01:01.111111111 ix ix ix {"ix":"ix"} [1231,1232] {"c1":1231,"c2":"i"} 2171-04-16
+1232 104 232 69.6 232.3 iy 2172-05-17 01:01:01.111111111 iy iy iy {"iy":"iy"} [1232,1233] {"c1":1232,"c2":"i"} 2172-05-17
+1233 105 233 69.9 233.3 iz 2173-06-18 01:01:01.111111111 iz iz iz {"iz":"iz"} [1233,1234] {"c1":1233,"c2":"j"} 2173-06-18
+1234 106 234 70.2 234.3 ja 2174-07-19 01:01:01.111111111 ja ja ja {"ja":"ja"} [1234,1235] {"c1":1234,"c2":"j"} 2174-07-19
+1235 107 235 70.5 235.3 jb 2175-08-20 01:01:01.111111111 jb jb jb {"jb":"jb"} [1235,1236] {"c1":1235,"c2":"j"} 2175-08-20
+1236 108 236 70.8 236.3 jc 2176-09-21 01:01:01.111111111 jc jc jc {"jc":"jc"} [1236,1237] {"c1":1236,"c2":"j"} 2176-09-21
+1237 109 237 71.1 237.3 jd 2177-10-22 01:01:01.111111111 jd jd jd {"jd":"jd"} [1237,1238] {"c1":1237,"c2":"j"} 2177-10-22
+1238 110 238 71.4 238.3 je 2178-11-23 01:01:01.111111111 je je je {"je":"je"} [1238,1239] {"c1":1238,"c2":"j"} 2178-11-23
+1239 111 239 71.7 239.3 jf 2179-12-24 01:01:01.111111111 jf jf jf {"jf":"jf"} [1239,1240] {"c1":1239,"c2":"j"} 2179-12-24
+1240 112 240 72.0 240.3 jg 2180-01-25 01:01:01.111111111 jg jg jg {"jg":"jg"} [1240,1241] {"c1":1240,"c2":"j"} 2180-01-25
+1241 113 241 72.3 241.3 jh 2181-02-26 01:01:01.111111111 jh jh jh {"jh":"jh"} [1241,1242] {"c1":1241,"c2":"j"} 2181-02-26
+1242 114 242 72.6 242.3 ji 2182-03-27 01:01:01.111111111 ji ji ji {"ji":"ji"} [1242,1243] {"c1":1242,"c2":"j"} 2182-03-27
+1243 115 243 72.9 243.3 jj 2183-04-01 01:01:01.111111111 jj jj jj {"jj":"jj"} [1243,1244] {"c1":1243,"c2":"j"} 2183-04-01
+1244 116 244 73.2 244.3 jk 2184-05-02 01:01:01.111111111 jk jk jk {"jk":"jk"} [1244,1245] {"c1":1244,"c2":"j"} 2184-05-02
+1245 117 245 73.5 245.3 jl 2185-06-03 01:01:01.111111111 jl jl jl {"jl":"jl"} [1245,1246] {"c1":1245,"c2":"j"} 2185-06-03
+1246 118 246 73.8 246.3 jm 2186-07-04 01:01:01.111111111 jm jm jm {"jm":"jm"} [1246,1247] {"c1":1246,"c2":"j"} 2186-07-04
+1247 119 247 74.1 247.3 jn 2187-08-05 01:01:01.111111111 jn jn jn {"jn":"jn"} [1247,1248] {"c1":1247,"c2":"j"} 2187-08-05
+1248 120 248 74.4 248.3 jo 2188-09-06 01:01:01.111111111 jo jo jo {"jo":"jo"} [1248,1249] {"c1":1248,"c2":"j"} 2188-09-06
+1249 121 249 74.7 249.3 jp 2189-10-07 01:01:01.111111111 jp jp jp {"jp":"jp"} [1249,1250] {"c1":1249,"c2":"j"} 2189-10-07
+1250 122 250 75.0 250.3 jq 2190-11-08 01:01:01.111111111 jq jq jq {"jq":"jq"} [1250,1251] {"c1":1250,"c2":"j"} 2190-11-08
+1251 123 251 75.3 251.3 jr 2191-12-09 01:01:01.111111111 jr jr jr {"jr":"jr"} [1251,1252] {"c1":1251,"c2":"j"} 2191-12-09
+1252 124 252 75.6 252.3 js 2192-01-10 01:01:01.111111111 js js js {"js":"js"} [1252,1253] {"c1":1252,"c2":"j"} 2192-01-10
+1253 125 253 75.9 253.3 jt 2193-02-11 01:01:01.111111111 jt jt jt {"jt":"jt"} [1253,1254] {"c1":1253,"c2":"j"} 2193-02-11
+1254 126 254 76.2 254.3 ju 2194-03-12 01:01:01.111111111 ju ju ju {"ju":"ju"} [1254,1255] {"c1":1254,"c2":"j"} 2194-03-12
+1255 127 255 76.5 255.3 jv 2195-04-13 01:01:01.111111111 jv jv jv {"jv":"jv"} [1255,1256] {"c1":1255,"c2":"j"} 2195-04-13
+1256 -128 256 76.8 256.3 jw 2196-05-14 01:01:01.111111111 jw jw jw {"jw":"jw"} [1256,1257] {"c1":1256,"c2":"j"} 2196-05-14
+1257 -127 257 77.1 257.3 jx 2197-06-15 01:01:01.111111111 jx jx jx {"jx":"jx"} [1257,1258] {"c1":1257,"c2":"j"} 2197-06-15
+1258 -126 258 77.4 258.3 jy 2198-07-16 01:01:01.111111111 jy jy jy {"jy":"jy"} [1258,1259] {"c1":1258,"c2":"j"} 2198-07-16
+1259 -125 259 77.7 259.3 jz 2199-08-17 01:01:01.111111111 jz jz jz {"jz":"jz"} [1259,1260] {"c1":1259,"c2":"k"} 2199-08-17
+1260 -124 260 78.0 260.3 ka 2200-09-18 01:01:01.111111111 ka ka ka {"ka":"ka"} [1260,1261] {"c1":1260,"c2":"k"} 2200-09-18
+1261 -123 261 78.3 261.3 kb 2201-10-19 01:01:01.111111111 kb kb kb {"kb":"kb"} [1261,1262] {"c1":1261,"c2":"k"} 2201-10-19
+1262 -122 262 78.6 262.3 kc 2202-11-20 01:01:01.111111111 kc kc kc {"kc":"kc"} [1262,1263] {"c1":1262,"c2":"k"} 2202-11-20
+1263 -121 263 78.9 263.3 kd 2203-12-21 01:01:01.111111111 kd kd kd {"kd":"kd"} [1263,1264] {"c1":1263,"c2":"k"} 2203-12-21
+1264 -120 264 79.2 264.3 ke 2204-01-22 01:01:01.111111111 ke ke ke {"ke":"ke"} [1264,1265] {"c1":1264,"c2":"k"} 2204-01-22
+1265 -119 265 79.5 265.3 kf 2205-02-23 01:01:01.111111111 kf kf kf {"kf":"kf"} [1265,1266] {"c1":1265,"c2":"k"} 2205-02-23
+1266 -118 266 79.8 266.3 kg 2206-03-24 01:01:01.111111111 kg kg kg {"kg":"kg"} [1266,1267] {"c1":1266,"c2":"k"} 2206-03-24
+1267 -117 267 80.1 267.3 kh 2207-04-25 01:01:01.111111111 kh kh kh {"kh":"kh"} [1267,1268] {"c1":1267,"c2":"k"} 2207-04-25
+1268 -116 268 80.4 268.3 ki 2208-05-26 01:01:01.111111111 ki ki ki {"ki":"ki"} [1268,1269] {"c1":1268,"c2":"k"} 2208-05-26
+1269 -115 269 80.7 269.3 kj 2209-06-27 01:01:01.111111111 kj kj kj {"kj":"kj"} [1269,1270] {"c1":1269,"c2":"k"} 2209-06-27
+1270 -114 270 81.0 270.3 kk 2210-07-01 01:01:01.111111111 kk kk kk {"kk":"kk"} [1270,1271] {"c1":1270,"c2":"k"} 2210-07-01
+1271 -113 271 81.3 271.3 kl 2211-08-02 01:01:01.111111111 kl kl kl {"kl":"kl"} [1271,1272] {"c1":1271,"c2":"k"} 2211-08-02
+1272 -112 272 81.6 272.3 km 2212-09-03 01:01:01.111111111 km km km {"km":"km"} [1272,1273] {"c1":1272,"c2":"k"} 2212-09-03
+1273 -111 273 81.9 273.3 kn 2213-10-04 01:01:01.111111111 kn kn kn {"kn":"kn"} [1273,1274] {"c1":1273,"c2":"k"} 2213-10-04
+1274 -110 274 82.2 274.3 ko 2214-11-05 01:01:01.111111111 ko ko ko {"ko":"ko"} [1274,1275] {"c1":1274,"c2":"k"} 2214-11-05
+1275 -109 275 82.5 275.3 kp 2215-12-06 01:01:01.111111111 kp kp kp {"kp":"kp"} [1275,1276] {"c1":1275,"c2":"k"} 2215-12-06
+1276 -108 276 82.8 276.3 kq 2216-01-07 01:01:01.111111111 kq kq kq {"kq":"kq"} [1276,1277] {"c1":1276,"c2":"k"} 2216-01-07
+1277 -107 277 83.1 277.3 kr 2217-02-08 01:01:01.111111111 kr kr kr {"kr":"kr"} [1277,1278] {"c1":1277,"c2":"k"} 2217-02-08
+1278 -106 278 83.4 278.3 ks 2218-03-09 01:01:01.111111111 ks ks ks {"ks":"ks"} [1278,1279] {"c1":1278,"c2":"k"} 2218-03-09
+1279 -105 279 83.7 279.3 kt 2219-04-10 01:01:01.111111111 kt kt kt {"kt":"kt"} [1279,1280] {"c1":1279,"c2":"k"} 2219-04-10
+1280 -104 280 84.0 280.3 ku 2220-05-11 01:01:01.111111111 ku ku ku {"ku":"ku"} [1280,1281] {"c1":1280,"c2":"k"} 2220-05-11
+1281 -103 281 84.3 281.3 kv 2221-06-12 01:01:01.111111111 kv kv kv {"kv":"kv"} [1281,1282] {"c1":1281,"c2":"k"} 2221-06-12
+1282 -102 282 84.6 282.3 kw 2222-07-13 01:01:01.111111111 kw kw kw {"kw":"kw"} [1282,1283] {"c1":1282,"c2":"k"} 2222-07-13
+1283 -101 283 84.9 283.3 kx 2223-08-14 01:01:01.111111111 kx kx kx {"kx":"kx"} [1283,1284] {"c1":1283,"c2":"k"} 2223-08-14
+1284 -100 284 85.2 284.3 ky 2224-09-15 01:01:01.111111111 ky ky ky {"ky":"ky"} [1284,1285] {"c1":1284,"c2":"k"} 2224-09-15
+1285 -99 285 85.5 285.3 kz 2225-10-16 01:01:01.111111111 kz kz kz {"kz":"kz"} [1285,1286] {"c1":1285,"c2":"l"} 2225-10-16
+1286 -98 286 85.8 286.3 la 2226-11-17 01:01:01.111111111 la la la {"la":"la"} [1286,1287] {"c1":1286,"c2":"l"} 2226-11-17
+1287 -97 287 86.1 287.3 lb 2227-12-18 01:01:01.111111111 lb lb lb {"lb":"lb"} [1287,1288] {"c1":1287,"c2":"l"} 2227-12-18
+1288 -96 288 86.4 288.3 lc 2228-01-19 01:01:01.111111111 lc lc lc {"lc":"lc"} [1288,1289] {"c1":1288,"c2":"l"} 2228-01-19
+1289 -95 289 86.7 289.3 ld 2229-02-20 01:01:01.111111111 ld ld ld {"ld":"ld"} [1289,1290] {"c1":1289,"c2":"l"} 2229-02-20
+1290 -94 290 87.0 290.3 le 2230-03-21 01:01:01.111111111 le le le {"le":"le"} [1290,1291] {"c1":1290,"c2":"l"} 2230-03-21
+1291 -93 291 87.3 291.3 lf 2231-04-22 01:01:01.111111111 lf lf lf {"lf":"lf"} [1291,1292] {"c1":1291,"c2":"l"} 2231-04-22
+1292 -92 292 87.6 292.3 lg 2232-05-23 01:01:01.111111111 lg lg lg {"lg":"lg"} [1292,1293] {"c1":1292,"c2":"l"} 2232-05-23
+1293 -91 293 87.9 293.3 lh 2233-06-24 01:01:01.111111111 lh lh lh {"lh":"lh"} [1293,1294] {"c1":1293,"c2":"l"} 2233-06-24
+1294 -90 294 88.2 294.3 li 2234-07-25 01:01:01.111111111 li li li {"li":"li"} [1294,1295] {"c1":1294,"c2":"l"} 2234-07-25
+1295 -89 295 88.5 295.3 lj 2235-08-26 01:01:01.111111111 lj lj lj {"lj":"lj"} [1295,1296] {"c1":1295,"c2":"l"} 2235-08-26
+1296 -88 296 88.8 296.3 lk 2236-09-27 01:01:01.111111111 lk lk lk {"lk":"lk"} [1296,1297] {"c1":1296,"c2":"l"} 2236-09-27
+1297 -87 297 89.1 297.3 ll 2237-10-01 01:01:01.111111111 ll ll ll {"ll":"ll"} [1297,1298] {"c1":1297,"c2":"l"} 2237-10-01
+1298 -86 298 89.4 298.3 lm 2238-11-02 01:01:01.111111111 lm lm lm {"lm":"lm"} [1298,1299] {"c1":1298,"c2":"l"} 2238-11-02
+1299 -85 299 89.7 299.3 ln 2239-12-03 01:01:01.111111111 ln ln ln {"ln":"ln"} [1299,1300] {"c1":1299,"c2":"l"} 2239-12-03
+PREHOOK: query: INSERT OVERWRITE TABLE parquet_types
+SELECT cint, ctinyint, csmallint, cfloat, cdouble, cstring1, t, cchar, cvarchar,
+unhex(cbinary), m1, l1, st1, d FROM parquet_types_staging
+PREHOOK: type: QUERY
+PREHOOK: Input: default@parquet_types_staging
+PREHOOK: Output: default@parquet_types
+POSTHOOK: query: INSERT OVERWRITE TABLE parquet_types
+SELECT cint, ctinyint, csmallint, cfloat, cdouble, cstring1, t, cchar, cvarchar,
+unhex(cbinary), m1, l1, st1, d FROM parquet_types_staging
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@parquet_types_staging
+POSTHOOK: Output: default@parquet_types
+POSTHOOK: Lineage: parquet_types.cbinary EXPRESSION [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cbinary, type:string, comment:null), ]
+POSTHOOK: Lineage: parquet_types.cchar SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cchar, type:char(5), comment:null), ]
+POSTHOOK: Lineage: parquet_types.cdouble SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cdouble, type:double, comment:null), ]
+POSTHOOK: Lineage: parquet_types.cfloat SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cfloat, type:float, comment:null), ]
+POSTHOOK: Lineage: parquet_types.cint SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cint, type:int, comment:null), ]
+POSTHOOK: Lineage: parquet_types.csmallint SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:csmallint, type:smallint, comment:null), ]
+POSTHOOK: Lineage: parquet_types.cstring1 SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cstring1, type:string, comment:null), ]
+POSTHOOK: Lineage: parquet_types.ctinyint SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:ctinyint, type:tinyint, comment:null), ]
+POSTHOOK: Lineage: parquet_types.cvarchar SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cvarchar, type:varchar(10), comment:null), ]
+POSTHOOK: Lineage: parquet_types.d SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:d, type:date, comment:null), ]
+POSTHOOK: Lineage: parquet_types.l1 SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:l1, type:array<int>, comment:null), ]
+POSTHOOK: Lineage: parquet_types.m1 SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:m1, type:map<string,varchar(3)>, comment:null), ]
+POSTHOOK: Lineage: parquet_types.st1 SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:st1, type:struct<c1:int,c2:char(1)>, comment:null), ]
+POSTHOOK: Lineage: parquet_types.t SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:t, type:timestamp, comment:null), ]
+PREHOOK: query: -- test types in group by
+
+EXPLAIN SELECT ctinyint,
+ MAX(cint),
+ MIN(csmallint),
+ COUNT(cstring1),
+ ROUND(AVG(cfloat), 5),
+ ROUND(STDDEV_POP(cdouble),5)
+FROM parquet_types
+GROUP BY ctinyint
+ORDER BY ctinyint
+PREHOOK: type: QUERY
+POSTHOOK: query: -- test types in group by
+
+EXPLAIN SELECT ctinyint,
+ MAX(cint),
+ MIN(csmallint),
+ COUNT(cstring1),
+ ROUND(AVG(cfloat), 5),
+ ROUND(STDDEV_POP(cdouble),5)
+FROM parquet_types
+GROUP BY ctinyint
+ORDER BY ctinyint
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: parquet_types
+ Statistics: Num rows: 300 Data size: 4200 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ctinyint (type: tinyint), cint (type: int), csmallint (type: smallint), cstring1 (type: string), cfloat (type: float), cdouble (type: double)
+ outputColumnNames: ctinyint, cint, csmallint, cstring1, cfloat, cdouble
+ Statistics: Num rows: 300 Data size: 4200 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: max(cint), min(csmallint), count(cstring1), avg(cfloat), stddev_pop(cdouble)
+ keys: ctinyint (type: tinyint)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 300 Data size: 4200 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: tinyint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: tinyint)
+ Statistics: Num rows: 300 Data size: 4200 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: struct<count:bigint,sum:double,input:float>), _col5 (type: struct<count:bigint,sum:double,variance:double>)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: max(VALUE._col0), min(VALUE._col1), count(VALUE._col2), avg(VALUE._col3), stddev_pop(VALUE._col4)
+ keys: KEY._col0 (type: tinyint)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 150 Data size: 2100 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: tinyint), _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), round(_col4, 5) (type: double), round(_col5, 5) (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 150 Data size: 2100 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: tinyint)
+ sort order: +
+ Statistics: Num rows: 150 Data size: 2100 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: double), _col5 (type: double)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: tinyint), VALUE._col0 (type: int), VALUE._col1 (type: smallint), VALUE._col2 (type: bigint), VALUE._col3 (type: double), VALUE._col4 (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 150 Data size: 2100 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 150 Data size: 2100 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT ctinyint,
+ MAX(cint),
+ MIN(csmallint),
+ COUNT(cstring1),
+ ROUND(AVG(cfloat), 5),
+ ROUND(STDDEV_POP(cdouble),5)
+FROM parquet_types
+GROUP BY ctinyint
+ORDER BY ctinyint
+PREHOOK: type: QUERY
+PREHOOK: Input: default@parquet_types
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT ctinyint,
+ MAX(cint),
+ MIN(csmallint),
+ COUNT(cstring1),
+ ROUND(AVG(cfloat), 5),
+ ROUND(STDDEV_POP(cdouble),5)
+FROM parquet_types
+GROUP BY ctinyint
+ORDER BY ctinyint
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@parquet_types
+#### A masked pattern was here ####
+-128 1256 0 2 38.4 128.0
+-127 1257 1 2 38.7 128.0
+-126 1258 2 2 39.0 128.0
+-125 1259 3 2 39.3 128.0
+-124 1260 4 2 39.6 128.0
+-123 1261 5 2 39.9 128.0
+-122 1262 6 2 40.2 128.0
+-121 1263 7 2 40.5 128.0
+-120 1264 8 2 40.8 128.0
+-119 1265 9 2 41.1 128.0
+-118 1266 10 2 41.4 128.0
+-117 1267 11 2 41.7 128.0
+-116 1268 12 2 42.0 128.0
+-115 1269 13 2 42.3 128.0
+-114 1270 14 2 42.6 128.0
+-113 1271 15 2 42.9 128.0
+-112 1272 16 2 43.2 128.0
+-111 1273 17 2 43.5 128.0
+-110 1274 18 2 43.8 128.0
+-109 1275 19 2 44.1 128.0
+-108 1276 20 2 44.4 128.0
+-107 1277 21 2 44.7 128.0
+-106 1278 22 2 45.0 128.0
+-105 1279 23 2 45.3 128.0
+-104 1280 24 2 45.6 128.0
+-103 1281 25 2 45.9 128.0
+-102 1282 26 2 46.2 128.0
+-101 1283 27 2 46.5 128.0
+-100 1284 28 2 46.8 128.0
+-99 1285 29 2 47.1 128.0
+-98 1286 30 2 47.4 128.0
+-97 1287 31 2 47.7 128.0
+-96 1288 32 2 48.0 128.0
+-95 1289 33 2 48.3 128.0
+-94 1290 34 2 48.6 128.0
+-93 1291 35 2 48.9 128.0
+-92 1292 36 2 49.2 128.0
+-91 1293 37 2 49.5 128.0
+-90 1294 38 2 49.8 128.0
+-89 1295 39 2 50.1 128.0
+-88 1296 40 2 50.4 128.0
+-87 1297 41 2 50.7 128.0
+-86 1298 42 2 51.0 128.0
+-85 1299 43 2 51.3 128.0
+-84 1044 44 1 13.2 0.0
+-83 1045 45 1 13.5 0.0
+-82 1046 46 1 13.8 0.0
+-81 1047 47 1 14.1 0.0
+-80 1048 48 1 14.4 0.0
+-79 1049 49 1 14.7 0.0
+-78 1050 50 1 15.0 0.0
+-77 1051 51 1 15.3 0.0
+-76 1052 52 1 15.6 0.0
+-75 1053 53 1 15.9 0.0
+-74 1054 54 1 16.2 0.0
+-73 1055 55 1 16.5 0.0
+-72 1056 56 1 16.8 0.0
+-71 1057 57 1 17.1 0.0
+-70 1058 58 1 17.4 0.0
+-69 1059 59 1 17.7 0.0
+-68 1060 60 1 18.0 0.0
+-67 1061 61 1 18.3 0.0
+-66 1062 62 1 18.6 0.0
+-65 1063 63 1 18.9 0.0
+-64 1064 64 1 19.2 0.0
+-63 1065 65 1 19.5 0.0
+-62 1066 66 1 19.8 0.0
+-61 1067 67 1 20.1 0.0
+-60 1068 68 1 20.4 0.0
+-59 1069 69 1 20.7 0.0
+-58 1070 70 1 21.0 0.0
+-57 1071 71 1 21.3 0.0
+-56 1072 72 1 21.6 0.0
+-55 1073 73 1 21.9 0.0
+-54 1074 74 1 22.2 0.0
+-53 1075 75 1 22.5 0.0
+-52 1076 76 1 22.8 0.0
+-51 1077 77 1 23.1 0.0
+-50 1078 78 1 23.4 0.0
+-49 1079 79 1 23.7 0.0
+-48 1080 80 1 24.0 0.0
+-47 1081 81 1 24.3 0.0
+-46 1082 82 1 24.6 0.0
+-45 1083 83 1 24.9 0.0
+-44 1084 84 1 25.2 0.0
+-43 1085 85 1 25.5 0.0
+-42 1086 86 1 25.8 0.0
+-41 1087 87 1 26.1 0.0
+-40 1088 88 1 26.4 0.0
+-39 1089 89 1 26.7 0.0
+-38 1090 90 1 27.0 0.0
+-37 1091 91 1 27.3 0.0
+-36 1092 92 1 27.6 0.0
+-35 1093 93 1 27.9 0.0
+-34 1094 94 1 28.2 0.0
+-33 1095 95 1 28.5 0.0
+-32 1096 96 1 28.8 0.0
+-31 1097 97 1 29.1 0.0
+-30 1098 98 1 29.4 0.0
+-29 1099 99 1 29.7 0.0
+-28 1100 100 1 30.0 0.0
+-27 1101 101 1 30.3 0.0
+-26 1102 102 1 30.6 0.0
+-25 1103 103 1 30.9 0.0
+-24 1104 104 1 31.2 0.0
+-23 1105 105 1 31.5 0.0
+-22 1106 106 1 31.8 0.0
+-21 1107 107 1 32.1 0.0
+-20 1108 108 1 32.4 0.0
+-19 1109 109 1 32.7 0.0
+-18 1110 110 1 33.0 0.0
+-17 1111 111 1 33.3 0.0
+-16 1112 112 1 33.6 0.0
+-15 1113 113 1 33.9 0.0
+-14 1114 114 1 34.2 0.0
+-13 1115 115 1 34.5 0.0
+-12 1116 116 1 34.8 0.0
+-11 1117 117 1 35.1 0.0
+-10 1118 118 1 35.4 0.0
+-9 1119 119 1 35.7 0.0
+-8 1120 120 1 36.0 0.0
+-7 1121 121 1 36.3 0.0
+-6 1122 122 1 36.6 0.0
+-5 1123 123 1 36.9 0.0
+-4 1124 124 1 37.2 0.0
+-3 1125 125 1 37.5 0.0
+-2 1126 126 1 37.8 0.0
+-1 1127 127 1 38.1 0.0
+0 1128 128 1 38.4 0.0
+1 1129 129 1 38.7 0.0
+2 1130 130 1 39.0 0.0
+3 1131 131 1 39.3 0.0
+4 1132 132 1 39.6 0.0
+5 1133 133 1 39.9 0.0
+6 1134 134 1 40.2 0.0
+7 1135 135 1 40.5 0.0
+8 1136 136 1 40.8 0.0
+9 1137 137 1 41.1 0.0
+10 1138 138 1 41.4 0.0
+11 1139 139 1 41.7 0.0
+12 1140 140 1 42.0 0.0
+13 1141 141 1 42.3 0.0
+14 1142 142 1 42.6 0.0
+15 1143 143 1 42.9 0.0
+16 1144 144 1 43.2 0.0
+17 1145 145 1 43.5 0.0
+18 1146 146 1 43.8 0.0
+19 1147 147 1 44.1 0.0
+20 1148 148 1 44.4 0.0
+21 1149 149 1 44.7 0.0
+22 1150 150 1 45.0 0.0
+23 1151 151 1 45.3 0.0
+24 1152 152 1 45.6 0.0
+25 1153 153 1 45.9 0.0
+26 1154 154 1 46.2 0.0
+27 1155 155 1 46.5 0.0
+28 1156 156 1 46.8 0.0
+29 1157 157 1 47.1 0.0
+30 1158 158 1 47.4 0.0
+31 1159 159 1 47.7 0.0
+32 1160 160 1 48.0 0.0
+33 1161 161 1 48.3 0.0
+34 1162 162 1 48.6 0.0
+35 1163 163 1 48.9 0.0
+36 1164 164 1 49.2 0.0
+37 1165 165 1 49.5 0.0
+38 1166 166 1 49.8 0.0
+39 1167 167 1 50.1 0.0
+40 1168 168 1 50.4 0.0
+41 1169 169 1 50.7 0.0
+42 1170 170 1 51.0 0.0
+43 1171 171 1 51.3 0.0
+44 1172 172 1 51.6 0.0
+45 1173 173 1 51.9 0.0
+46 1174 174 1 52.2 0.0
+47 1175 175 1 52.5 0.0
+48 1176 176 1 52.8 0.0
+49 1177 177 1 53.1 0.0
+50 1178 178 1 53.4 0.0
+51 1179 179 1 53.7 0.0
+52 1180 180 1 54.0 0.0
+53 1181 181 1 54.3 0.0
+54 1182 182 1 54.6 0.0
+55 1183 183 1 54.9 0.0
+56 1184 184 1 55.2 0.0
+57 1185 185 1 55.5 0.0
+58 1186 186 1 55.8 0.0
+59 1187 187 1 56.1 0.0
+60 1188 188 1 56.4 0.0
+61 1189 189 1 56.7 0.0
+62 1190 190 1 57.0 0.0
+63 1191 191 1 57.3 0.0
+64 1192 192 1 57.6 0.0
+65 1193 193 1 57.9 0.0
+66 1194 194 1 58.2 0.0
+67 1195 195 1 58.5 0.0
+68 1196 196 1 58.8 0.0
+69 1197 197 1 59.1 0.0
+70 1198 198 1 59.4 0.0
+71 1199 199 1 59.7 0.0
+72 1200 200 1 60.0 0.0
+73 1201 201 1 60.3 0.0
+74 1202 202 1 60.6 0.0
+75 1203 203 1 60.9 0.0
+76 1204 204 1 61.2 0.0
+77 1205 205 1 61.5 0.0
+78 1206 206 1 61.8 0.0
+79 1207 207 1 62.1 0.0
+80 1208 208 1 62.4 0.0
+81 1209 209 1 62.7 0.0
+82 1210 210 1 63.0 0.0
+83 1211 211 1 63.3 0.0
+84 1212 212 1 63.6 0.0
+85 1213 213 1 63.9 0.0
+86 1214 214 1 64.2 0.0
+87 1215 215 1 64.5 0.0
+88 1216 216 1 64.8 0.0
+89 1217 217 1 65.1 0.0
+90 1218 218 1 65.4 0.0
+91 1219 219 1 65.7 0.0
+92 1220 220 1 66.0 0.0
+93 1221 221 1 66.3 0.0
+94 1222 222 1 66.6 0.0
+95 1223 223 1 66.9 0.0
+96 1224 224 1 67.2 0.0
+97 1225 225 1 67.5 0.0
+98 1226 226 1 67.8 0.0
+99 1227 227 1 68.1 0.0
+100 1228 228 1 68.4 0.0
+101 1229 229 1 68.7 0.0
+102 1230 230 1 69.0 0.0
+103 1231 231 1 69.3 0.0
+104 1232 232 1 69.6 0.0
+105 1233 233 1 69.9 0.0
+106 1234 234 1 70.2 0.0
+107 1235 235 1 70.5 0.0
+108 1236 236 1 70.8 0.0
+109 1237 237 1 71.1 0.0
+110 1238 238 1 71.4 0.0
+111 1239 239 1 71.7 0.0
+112 1240 240 1 72.0 0.0
+113 1241 241 1 72.3 0.0
+114 1242 242 1 72.6 0.0
+115 1243 243 1 72.9 0.0
+116 1244 244 1 73.2 0.0
+117 1245 245 1 73.5 0.0
+118 1246 246 1 73.8 0.0
+119 1247 247 1 74.1 0.0
+120 1248 248 1 74.4 0.0
+121 1249 249 1 74.7 0.0
+122 1250 250 1 75.0 0.0
+123 1251 251 1 75.3 0.0
+124 1252 252 1 75.6 0.0
+125 1253 253 1 75.9 0.0
+126 1254 254 1 76.2 0.0
+127 1255 255 1 76.5 0.0
+PREHOOK: query: EXPLAIN SELECT cfloat, count(*) FROM parquet_types GROUP BY cfloat ORDER BY cfloat
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT cfloat, count(*) FROM parquet_types GROUP BY cfloat ORDER BY cfloat
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: parquet_types
+ Statistics: Num rows: 300 Data size: 4200 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cfloat (type: float)
+ outputColumnNames: cfloat
+ Statistics: Num rows: 300 Data size: 4200 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ keys: cfloat (type: float)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 300 Data size: 4200 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: float)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: float)
+ Statistics: Num rows: 300 Data size: 4200 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: float)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 150 Data size: 2100 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: float)
+ sort order: +
+ Statistics: Num rows: 150 Data size: 2100 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: float), VALUE._col0 (type: bigint)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 150 Data size: 2100 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 150 Data size: 2100 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT cfloat, count(*) FROM parquet_types GROUP BY cfloat ORDER BY cfloat
+PREHOOK: type: QUERY
+PREHOOK: Input: default@parquet_types
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT cfloat, count(*) FROM parquet_types GROUP BY cfloat ORDER BY cfloat
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@parquet_types
+#### A masked pattern was here ####
+0.0 1
+0.3 1
+0.6 1
+0.9 1
+1.2 1
+1.5 1
+1.8 1
+2.1 1
+2.4 1
+2.7 1
+3.0 1
+3.3 1
+3.6 1
+3.9 1
+4.2 1
+4.5 1
+4.8 1
+5.1 1
+5.4 1
+5.7 1
+6.0 1
+6.3 1
+6.6 1
+6.9 1
+7.2 1
+7.5 1
+7.8 1
+8.1 1
+8.4 1
+8.7 1
+9.0 1
+9.3 1
+9.6 1
+9.9 1
+10.2 1
+10.5 1
+10.8 1
+11.1 1
+11.4 1
+11.7 1
+12.0 1
+12.3 1
+12.6 1
+12.9 1
+13.2 1
+13.5 1
+13.8 1
+14.1 1
+14.4 1
+14.7 1
+15.0 1
+15.3 1
+15.6 1
+15.9 1
+16.2 1
+16.5 1
+16.8 1
+17.1 1
+17.4 1
+17.7 1
+18.0 1
+18.3 1
+18.6 1
+18.9 1
+19.2 1
+19.5 1
+19.8 1
+20.1 1
+20.4 1
+20.7 1
+21.0 1
+21.3 1
+21.6 1
+21.9 1
+22.2 1
+22.5 1
+22.8 1
+23.1 1
+23.4 1
+23.7 1
+24.0 1
+24.3 1
+24.6 1
+24.9 1
+25.2 1
+25.5 1
+25.8 1
+26.1 1
+26.4 1
+26.7 1
+27.0 1
+27.3 1
+27.6 1
+27.9 1
+28.2 1
+28.5 1
+28.8 1
+29.1 1
+29.4 1
+29.7 1
+30.0 1
+30.3 1
+30.6 1
+30.9 1
+31.2 1
+31.5 1
+31.8 1
+32.1 1
+32.4 1
+32.7 1
+33.0 1
+33.3 1
+33.6 1
+33.9 1
+34.2 1
+34.5 1
+34.8 1
+35.1 1
+35.4 1
+35.7 1
+36.0 1
+36.3 1
+36.6 1
+36.9 1
+37.2 1
+37.5 1
+37.8 1
+38.1 1
+38.4 1
+38.7 1
+39.0 1
+39.3 1
+39.6 1
+39.9 1
+40.2 1
+40.5 1
+40.8 1
+41.1 1
+41.4 1
+41.7 1
+42.0 1
+42.3 1
+42.6 1
+42.9 1
+43.2 1
+43.5 1
+43.8 1
+44.1 1
+44.4 1
+44.7 1
+45.0 1
+45.3 1
+45.6 1
+45.9 1
+46.2 1
+46.5 1
+46.8 1
+47.1 1
+47.4 1
+47.7 1
+48.0 1
+48.3 1
+48.6 1
+48.9 1
+49.2 1
+49.5 1
+49.8 1
+50.1 1
+50.4 1
+50.7 1
+51.0 1
+51.3 1
+51.6 1
+51.9 1
+52.2 1
+52.5 1
+52.8 1
+53.1 1
+53.4 1
+53.7 1
+54.0 1
+54.3 1
+54.6 1
+54.9 1
+55.2 1
+55.5 1
+55.8 1
+56.1 1
+56.4 1
+56.7 1
+57.0 1
+57.3 1
+57.6 1
+57.9 1
+58.2 1
+58.5 1
+58.8 1
+59.1 1
+59.4 1
+59.7 1
+60.0 1
+60.3 1
+60.6 1
+60.9 1
+61.2 1
+61.5 1
+61.8 1
+62.1 1
+62.4 1
+62.7 1
+63.0 1
+63.3 1
+63.6 1
+63.9 1
+64.2 1
+64.5 1
+64.8 1
+65.1 1
+65.4 1
+65.7 1
+66.0 1
+66.3 1
+66.6 1
+66.9 1
+67.2 1
+67.5 1
+67.8 1
+68.1 1
+68.4 1
+68.7 1
+69.0 1
+69.3 1
+69.6 1
+69.9 1
+70.2 1
+70.5 1
+70.8 1
+71.1 1
+71.4 1
+71.7 1
+72.0 1
+72.3 1
+72.6 1
+72.9 1
+73.2 1
+73.5 1
+73.8 1
+74.1 1
+74.4 1
+74.7 1
+75.0 1
+75.3 1
+75.6 1
+75.9 1
+76.2 1
+76.5 1
+76.8 1
+77.1 1
+77.4 1
+77.7 1
+78.0 1
+78.3 1
+78.6 1
+78.9 1
+79.2 1
+79.5 1
+79.8 1
+80.1 1
+80.4 1
+80.7 1
+81.0 1
+81.3 1
+81.6 1
+81.9 1
+82.2 1
+82.5 1
+82.8 1
+83.1 1
+83.4 1
+83.7 1
+84.0 1
+84.3 1
+84.6 1
+84.9 1
+85.2 1
+85.5 1
+85.8 1
+86.1 1
+86.4 1
+86.7 1
+87.0 1
+87.3 1
+87.6 1
+87.9 1
+88.2 1
+88.5 1
+88.8 1
+89.1 1
+89.4 1
+89.7 1
+PREHOOK: query: EXPLAIN SELECT cchar, count(*) FROM parquet_types GROUP BY cchar ORDER BY cchar
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT cchar, count(*) FROM parquet_types GROUP BY cchar ORDER BY cchar
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: parquet_types
+ Statistics: Num rows: 300 Data size: 4200 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cchar (type: char(5))
+ outputColumnNames: cchar
+ Statistics: Num rows: 300 Data size: 4200 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ keys: cchar (type: char(5))
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 300 Data size: 4200 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: char(5))
+ sort order: +
+ Map-reduce partition columns: _col0 (type: char(5))
+ Statistics: Num rows: 300 Data size: 4200 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: char(5))
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 150 Data size: 2100 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: char(5))
+ sort order: +
+ Statistics: Num rows: 150 Data size: 2100 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: char(5)), VALUE._col0 (type: bigint)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 150 Data size: 2100 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 150 Data size: 2100 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT cchar, count(*) FROM parquet_types GROUP BY cchar ORDER BY cchar
+PREHOOK: type: QUERY
+PREHOOK: Input: default@parquet_types
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT cchar, count(*) FROM parquet_types GROUP BY cchar ORDER BY cchar
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@parquet_types
+#### A masked pattern was here ####
+ 1
+b 1
+ba 1
+bb 1
+bc 1
+bd 1
+be 1
+bf 1
+bg 1
+bh 1
+bi 1
+bj 1
+bk 1
+bl 1
+bm 1
+bn 1
+bo 1
+bp 1
+bq 1
+br 1
+bs 1
+bt 1
+bu 1
+bv 1
+bw 1
+bx 1
+by 1
+bz 1
+c 1
+ca 1
+cb 1
+cc 1
+cd 1
+ce 1
+cf 1
+cg 1
+ch 1
+ci 1
+cj 1
+ck 1
+cl 1
+cm 1
+cn 1
+co 1
+cp 1
+cq 1
+cr 1
+cs 1
+ct 1
+cu 1
+cv 1
+cw 1
+cx 1
+cy 1
+cz 1
+d 1
+da 1
+db 1
+dc 1
+dd 1
+de 1
+df 1
+dg 1
+dh 1
+di 1
+dj 1
+dk 1
+dl 1
+dm 1
+dn 1
+do 1
+dp 1
+dq 1
+dr 1
+ds 1
+dt 1
+du 1
+dv 1
+dw 1
+dx 1
+dy 1
+dz 1
+e 1
+ea 1
+eb 1
+ec 1
+ed 1
+ee 1
+ef 1
+eg 1
+eh 1
+ei 1
+ej 1
+ek 1
+el 1
+em 1
+en 1
+eo 1
+ep 1
+eq 1
+er 1
+es 1
+et 1
+eu 1
+ev 1
+ew 1
+ex 1
+ey 1
+ez 1
+f 1
+fa 1
+fb 1
+fc 1
+fd 1
+fe 1
+ff 1
+fg 1
+fh 1
+fi 1
+fj 1
+fk 1
+fl 1
+fm 1
+fn 1
+fo 1
+fp 1
+fq 1
+fr 1
+fs 1
+ft 1
+fu 1
+fv 1
+fw 1
+fx 1
+fy 1
+fz 1
+g 1
+ga 1
+gb 1
+gc 1
+gd 1
+ge 1
+gf 1
+gg 1
+gh 1
+gi 1
+gj 1
+gk 1
+gl 1
+gm 1
+gn 1
+go 1
+gp 1
+gq 1
+gr 1
+gs 1
+gt 1
+gu 1
+gv 1
+gw 1
+gx 1
+gy 1
+gz 1
+h 1
+ha 1
+hb 1
+hc 1
+hd 1
+he 1
+hf 1
+hg 1
+hh 1
+hi 1
+hj 1
+hk 1
+hl 1
+hm 1
+hn 1
+ho 1
+hp 1
+hq 1
+hr 1
+hs 1
+ht 1
+hu 1
+hv 1
+hw 1
+hx 1
+hy 1
+hz 1
+i 1
+ia 1
+ib 1
+ic 1
+id 1
+ie 1
+if 1
+ig 1
+ih 1
+ii 1
+ij 1
+ik 1
+il 1
+im 1
+in 1
+io 1
+ip 1
+iq 1
+ir 1
+is 1
+it 1
+iu 1
+iv 1
+iw 1
+ix 1
+iy 1
+iz 1
+j 1
+ja 1
+jb 1
+jc 1
+jd 1
+je 1
+jf 1
+jg 1
+jh 1
+ji 1
+jj 1
+jk 1
+jl 1
+jm 1
+jn 1
+jo 1
+jp 1
+jq 1
+jr 1
+js 1
+jt 1
+ju 1
+jv 1
+jw 1
+jx 1
+jy 1
+jz 1
+k 1
+ka 1
+kb 1
+kc 1
+kd 1
+ke 1
+kf 1
+kg 1
+kh 1
+ki 1
+kj 1
+kk 1
+kl 1
+km 1
+kn 1
+ko 1
+kp 1
+kq 1
+kr 1
+ks 1
+kt 1
+ku 1
+kv 1
+kw 1
+kx 1
+ky 1
+kz 1
+l 1
+la 1
+lb 1
+lc 1
+ld 1
+le 1
+lf 1
+lg 1
+lh 1
+li 1
+lj 1
+lk 1
+ll 1
+lm 1
+ln 1
+m 1
+n 1
+o 1
+p 1
+q 1
+r 1
+s 1
+t 1
+u 1
+v 1
+w 1
+x 1
+y 1
+z 1
+PREHOOK: query: EXPLAIN SELECT cvarchar, count(*) FROM parquet_types GROUP BY cvarchar ORDER BY cvarchar
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT cvarchar, count(*) FROM parquet_types GROUP BY cvarchar ORDER BY cvarchar
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: parquet_types
+ Statistics: Num rows: 300 Data size: 4200 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cvarchar (type: varchar(10))
+ outputColumnNames: cvarchar
+ Statistics: Num rows: 300 Data size: 4200 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ keys: cvarchar (type: varchar(10))
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 300 Data size: 4200 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: varchar(10))
+ sort order: +
+ Map-reduce partition columns: _col0 (type: varchar(10))
+ Statistics: Num rows: 300 Data size: 4200 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: varchar(10))
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 150 Data size: 2100 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: varchar(10))
+ sort order: +
+ Statistics: Num rows: 150 Data size: 2100 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: varchar(10)), VALUE._col0 (type: bigint)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 150 Data size: 2100 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 150 Data size: 2100 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT cvarchar, count(*) FROM parquet_types GROUP BY cvarchar ORDER BY cvarchar
+PREHOOK: type: QUERY
+PREHOOK: Input: default@parquet_types
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT cvarchar, count(*) FROM parquet_types GROUP BY cvarchar ORDER BY cvarchar
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@parquet_types
+#### A masked pattern was here ####
+ 1
+b 1
+ba 1
+bb 1
+bc 1
+bd 1
+be 1
+bf 1
+bg 1
+bh 1
+bi 1
+bj 1
+bk 1
+bl 1
+bm 1
+bn 1
+bo 1
+bp 1
+bq 1
+br 1
+bs 1
+bt 1
+bu 1
+bv 1
+bw 1
+bx 1
+by 1
+bz 1
+c 1
+ca 1
+cb 1
+cc 1
+cd 1
+ce 1
+cf 1
+cg 1
+ch 1
+ci 1
+cj 1
+ck 1
+cl 1
+cm 1
+cn 1
+co 1
+cp 1
+cq 1
+cr 1
+cs 1
+ct 1
+cu 1
+cv 1
+cw 1
+cx 1
+cy 1
+cz 1
+d 1
+da 1
+db 1
+dc 1
+dd 1
+de 1
+df 1
+dg 1
+dh 1
+di 1
+dj 1
+dk 1
+dl 1
+dm 1
+dn 1
+do 1
+dp 1
+dq 1
+dr 1
+ds 1
+dt 1
+du 1
+dv 1
+dw 1
+dx 1
+dy 1
+dz 1
+e 1
+ea 1
+eb 1
+ec 1
+ed 1
+ee 1
+ef 1
+eg 1
+eh 1
+ei 1
+ej 1
+ek 1
+el 1
+em 1
+en 1
+eo 1
+ep 1
+eq 1
+er 1
+es 1
+et 1
+eu 1
+ev 1
+ew 1
+ex 1
+ey 1
+ez 1
+f 1
+fa 1
+fb 1
+fc 1
+fd 1
+fe 1
+ff 1
+fg 1
+fh 1
+fi 1
+fj 1
+fk 1
+fl 1
+fm 1
+fn 1
+fo 1
+fp 1
+fq 1
+fr 1
+fs 1
+ft 1
+fu 1
+fv 1
+fw 1
+fx 1
+fy 1
+fz 1
+g 1
+ga 1
+gb 1
+gc 1
+gd 1
+ge 1
+gf 1
+gg 1
+gh 1
+gi 1
+gj 1
+gk 1
+gl 1
+gm 1
+gn 1
+go 1
+gp 1
+gq 1
+gr 1
+gs 1
+gt 1
+gu 1
+gv 1
+gw 1
+gx 1
+gy 1
+gz 1
+h 1
+ha 1
+hb 1
+hc 1
+hd 1
+he 1
+hf 1
+hg 1
+hh 1
+hi 1
+hj 1
+hk 1
+hl 1
+hm 1
+hn 1
+ho 1
+hp 1
+hq 1
+hr 1
+hs 1
+ht 1
+hu 1
+hv 1
+hw 1
+hx 1
+hy 1
+hz 1
+i 1
+ia 1
+ib 1
+ic 1
+id 1
+ie 1
+if 1
+ig 1
+ih 1
+ii 1
+ij 1
+ik 1
+il 1
+im 1
+in 1
+io 1
+ip 1
+iq 1
+ir 1
+is 1
+it 1
+iu 1
+iv 1
+iw 1
+ix 1
+iy 1
+iz 1
+j 1
+ja 1
+jb 1
+jc 1
+jd 1
+je 1
+jf 1
+jg 1
+jh 1
+ji 1
+jj 1
+jk 1
+jl 1
+jm 1
+jn 1
+jo 1
+jp 1
+jq 1
+jr 1
+js 1
+jt 1
+ju 1
+jv 1
+jw 1
+jx 1
+jy 1
+jz 1
+k 1
+ka 1
+kb 1
+kc 1
+kd 1
+ke 1
+kf 1
+kg 1
+kh 1
+ki 1
+kj 1
+kk 1
+kl 1
+km 1
+kn 1
+ko 1
+kp 1
+kq 1
+kr 1
+ks 1
+kt 1
+ku 1
+kv 1
+kw 1
+kx 1
+ky 1
+kz 1
+l 1
+la 1
+lb 1
+lc 1
+ld 1
+le 1
+lf 1
+lg 1
+lh 1
+li 1
+lj 1
+lk 1
+ll 1
+lm 1
+ln 1
+m 1
+n 1
+o 1
+p 1
+q 1
+r 1
+s 1
+t 1
+u 1
+v 1
+w 1
+x 1
+y 1
+z 1
+PREHOOK: query: EXPLAIN SELECT cstring1, count(*) FROM parquet_types GROUP BY cstring1 ORDER BY cstring1
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT cstring1, count(*) FROM parquet_types GROUP BY cstring1 ORDER BY cstring1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: parquet_types
+ Statistics: Num rows: 300 Data size: 4200 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cstring1 (type: string)
+ outputColumnNames: cstring1
+ Statistics: Num rows: 300 Data size: 4200 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ keys: cstring1 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 300 Data size: 4200 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 300 Data size: 4200 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 150 Data size: 2100 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Statistics: Num rows: 150 Data size: 2100 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 150 Data size: 2100 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 150 Data size: 2100 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT cstring1, count(*) FROM parquet_types GROUP BY cstring1 ORDER BY cstring1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@parquet_types
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT cstring1, count(*) FROM parquet_types GROUP BY cstring1 ORDER BY cstring1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@parquet_types
+#### A masked pattern was here ####
+ 1
+b 1
+ba 1
+bb 1
+bc 1
+bd 1
+be 1
+bf 1
+bg 1
+bh 1
+bi 1
+bj 1
+bk 1
+bl 1
+bm 1
+bn 1
+bo 1
+bp 1
+bq 1
+br 1
+bs 1
+bt 1
+bu 1
+bv 1
+bw 1
+bx 1
+by 1
+bz 1
+c 1
+ca 1
+cb 1
+cc 1
+cd 1
+ce 1
+cf 1
+cg 1
+ch 1
+ci 1
+cj 1
+ck 1
+cl 1
+cm 1
+cn 1
+co 1
+cp 1
+cq 1
+cr 1
+cs 1
+ct 1
+cu 1
+cv 1
+cw 1
+cx 1
+cy 1
+cz 1
+d 1
+da 1
+db 1
+dc 1
+dd 1
+de 1
+df 1
+dg 1
+dh 1
+di 1
+dj 1
+dk 1
+dl 1
+dm 1
+dn 1
+do 1
+dp 1
+dq 1
+dr 1
+ds 1
+dt 1
+du 1
+dv 1
+dw 1
+dx 1
+dy 1
+dz 1
+e 1
+ea 1
+eb 1
+ec 1
+ed 1
+ee 1
+ef 1
+eg 1
+eh 1
+ei 1
+ej 1
+ek 1
+el 1
+em 1
+en 1
+eo 1
+ep 1
+eq 1
+er 1
+es 1
+et 1
+eu 1
+ev 1
+ew 1
+ex 1
+ey 1
+ez 1
+f 1
+fa 1
+fb 1
+fc 1
+fd 1
+fe 1
+ff 1
+fg 1
+fh 1
+fi 1
+fj 1
+fk 1
+fl 1
+fm 1
+fn 1
+fo 1
+fp 1
+fq 1
+fr 1
+fs 1
+ft 1
+fu 1
+fv 1
+fw 1
+fx 1
+fy 1
+fz 1
+g 1
+ga 1
+gb 1
+gc 1
+gd 1
+ge 1
+gf 1
+gg 1
+gh 1
+gi 1
+gj 1
+gk 1
+gl 1
+gm 1
+gn 1
+go 1
+gp 1
+gq 1
+gr 1
+gs 1
+gt 1
+gu 1
+gv 1
+gw 1
+gx 1
+gy 1
+gz 1
+h 1
+ha 1
+hb 1
+hc 1
+hd 1
+he 1
+hf 1
+hg 1
+hh 1
+hi 1
+hj 1
+hk 1
+hl 1
+hm 1
+hn 1
+ho 1
+hp 1
+hq 1
+hr 1
+hs 1
+ht 1
+hu 1
+hv 1
+hw 1
+hx 1
+hy 1
+hz 1
+i 1
+ia 1
+ib 1
+ic 1
+id 1
+ie 1
+if 1
+ig 1
+ih 1
+ii 1
+ij 1
+ik 1
+il 1
+im 1
+in 1
+io 1
+ip 1
+iq 1
+ir 1
+is 1
+it 1
+iu 1
+iv 1
+iw 1
+ix 1
+iy 1
+iz 1
+j 1
+ja 1
+jb 1
+jc 1
+jd 1
+je 1
+jf 1
+jg 1
+jh 1
+ji 1
+jj 1
+jk 1
+jl 1
+jm 1
+jn 1
+jo 1
+jp 1
+jq 1
+jr 1
+js 1
+jt 1
+ju 1
+jv 1
+jw 1
+jx 1
+jy 1
+jz 1
+k 1
+ka 1
+kb 1
+kc 1
+kd 1
+ke 1
+kf 1
+kg 1
+kh 1
+ki 1
+kj 1
+kk 1
+kl 1
+km 1
+kn 1
+ko 1
+kp 1
+kq 1
+kr 1
+ks 1
+kt 1
+ku 1
+kv 1
+kw 1
+kx 1
+ky 1
+kz 1
+l 1
+la 1
+lb 1
+lc 1
+ld 1
+le 1
+lf 1
+lg 1
+lh 1
+li 1
+lj 1
+lk 1
+ll 1
+lm 1
+ln 1
+m 1
+n 1
+o 1
+p 1
+q 1
+r 1
+s 1
+t 1
+u 1
+v 1
+w 1
+x 1
+y 1
+z 1
+PREHOOK: query: EXPLAIN SELECT hex(cbinary), count(*) FROM parquet_types GROUP BY cbinary
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT hex(cbinary), count(*) FROM parquet_types GROUP BY cbinary
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: parquet_types
+ Statistics: Num rows: 300 Data size: 4200 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cbinary (type: binary)
+ outputColumnNames: cbinary
+ Statistics: Num rows: 300 Data size: 4200 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ keys: cbinary (type: binary)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 300 Data size: 4200 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: binary)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: binary)
+ Statistics: Num rows: 300 Data size: 4200 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: binary)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 150 Data size: 2100 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: hex(_col0) (type: string), _col1 (type: bigint)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 150 Data size: 2100 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 150 Data size: 2100 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT hex(cbinary), count(*) FROM parquet_types GROUP BY cbinary
+PREHOOK: type: QUERY
+PREHOOK: Input: default@parquet_types
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT hex(cbinary), count(*) FROM parquet_types GROUP BY cbinary
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@parquet_types
+#### A masked pattern was here ####
+NULL 264
+ 1
+0B 1
+0C 1
+0D 1
+0E 1
+0F 1
+BA 1
+BB 1
+BC 1
+BD 1
+BE 1
+BF 1
+CA 1
+CB 1
+CC 1
+CD 1
+CE 1
+CF 1
+DA 1
+DB 1
+DC 1
+DD 1
+DE 1
+DF 1
+EA 1
+EB 1
+EC 1
+ED 1
+EE 1
+EF 1
+FA 1
+FB 1
+FC 1
+FD 1
+FE 1
+FF 1
[3/4] hive git commit: HIVE-14815: Implement Parquet vectorization
reader for Primitive types(Ferdinand Xu, review by Chao Sun) This closes #104
Posted by xu...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/936df7a1/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedParquetRecordReader.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedParquetRecordReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedParquetRecordReader.java
new file mode 100644
index 0000000..f94c49a
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedParquetRecordReader.java
@@ -0,0 +1,289 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.parquet.vector;
+
+import com.google.common.annotations.VisibleForTesting;
+import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx;
+import org.apache.hadoop.hive.ql.io.IOConstants;
+import org.apache.hadoop.hive.ql.io.parquet.ParquetRecordReaderBase;
+import org.apache.hadoop.hive.ql.io.parquet.ProjectionPusher;
+import org.apache.hadoop.hive.ql.io.parquet.read.DataWritableReadSupport;
+import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
+import org.apache.hadoop.hive.serde2.SerDeStats;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RecordReader;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.parquet.column.ColumnDescriptor;
+import org.apache.parquet.column.page.PageReadStore;
+import org.apache.parquet.filter2.compat.FilterCompat;
+import org.apache.parquet.hadoop.ParquetFileReader;
+import org.apache.parquet.hadoop.ParquetInputSplit;
+import org.apache.parquet.hadoop.metadata.BlockMetaData;
+import org.apache.parquet.hadoop.metadata.ParquetMetadata;
+import org.apache.parquet.schema.MessageType;
+import org.apache.parquet.schema.Type;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import static org.apache.parquet.filter2.compat.RowGroupFilter.filterRowGroups;
+import static org.apache.parquet.format.converter.ParquetMetadataConverter.NO_FILTER;
+import static org.apache.parquet.format.converter.ParquetMetadataConverter.range;
+import static org.apache.parquet.hadoop.ParquetFileReader.readFooter;
+import static org.apache.parquet.hadoop.ParquetInputFormat.getFilter;
+
+/**
+ * This reader is used to read a batch of record from inputsplit, part of the code is referred
+ * from Apache Spark and Apache Parquet.
+ */
+public class VectorizedParquetRecordReader extends ParquetRecordReaderBase
+ implements RecordReader<NullWritable, VectorizedRowBatch> {
+ public static final Logger LOG = LoggerFactory.getLogger(VectorizedParquetRecordReader.class);
+
+ private List<Integer> colsToInclude;
+
+ protected MessageType fileSchema;
+ protected MessageType requestedSchema;
+ private List<String> columnNamesList;
+ private List<TypeInfo> columnTypesList;
+ private VectorizedRowBatchCtx rbCtx;
+
+ /**
+ * For each request column, the reader to read this column. This is NULL if this column
+ * is missing from the file, in which case we populate the attribute with NULL.
+ */
+ private VectorizedColumnReader[] columnReaders;
+
+ /**
+ * The number of rows that have been returned.
+ */
+ private long rowsReturned;
+
+ /**
+ * The number of rows that have been reading, including the current in flight row group.
+ */
+ private long totalCountLoadedSoFar = 0;
+
+ /**
+ * The total number of rows this RecordReader will eventually read. The sum of the
+ * rows of all the row groups.
+ */
+ protected long totalRowCount;
+
+ @VisibleForTesting
+ public VectorizedParquetRecordReader(
+ InputSplit inputSplit,
+ JobConf conf) {
+ try {
+ serDeStats = new SerDeStats();
+ projectionPusher = new ProjectionPusher();
+ initialize(inputSplit, conf);
+ colsToInclude = ColumnProjectionUtils.getReadColumnIDs(conf);
+ rbCtx = Utilities.getVectorizedRowBatchCtx(conf);
+ } catch (Throwable e) {
+ LOG.error("Failed to create the vectorized reader due to exception " + e);
+ throw new RuntimeException(e);
+ }
+ }
+
+ public VectorizedParquetRecordReader(
+ org.apache.hadoop.mapred.InputSplit oldInputSplit,
+ JobConf conf) {
+ try {
+ serDeStats = new SerDeStats();
+ projectionPusher = new ProjectionPusher();
+ initialize(getSplit(oldInputSplit, conf), conf);
+ colsToInclude = ColumnProjectionUtils.getReadColumnIDs(conf);
+ rbCtx = Utilities.getVectorizedRowBatchCtx(conf);
+ } catch (Throwable e) {
+ LOG.error("Failed to create the vectorized reader due to exception " + e);
+ throw new RuntimeException(e);
+ }
+ }
+
+ public void initialize(
+ InputSplit oldSplit,
+ JobConf configuration) throws IOException, InterruptedException {
+ jobConf = configuration;
+ ParquetMetadata footer;
+ List<BlockMetaData> blocks;
+ ParquetInputSplit split = (ParquetInputSplit) oldSplit;
+ boolean indexAccess =
+ configuration.getBoolean(DataWritableReadSupport.PARQUET_COLUMN_INDEX_ACCESS, false);
+ this.file = split.getPath();
+ long[] rowGroupOffsets = split.getRowGroupOffsets();
+
+ String columnNames = configuration.get(IOConstants.COLUMNS);
+ columnNamesList = DataWritableReadSupport.getColumnNames(columnNames);
+ String columnTypes = configuration.get(IOConstants.COLUMNS_TYPES);
+ columnTypesList = DataWritableReadSupport.getColumnTypes(columnTypes);
+
+ // if task.side.metadata is set, rowGroupOffsets is null
+ if (rowGroupOffsets == null) {
+ //TODO check whether rowGroupOffSets can be null
+ // then we need to apply the predicate push down filter
+ footer = readFooter(configuration, file, range(split.getStart(), split.getEnd()));
+ MessageType fileSchema = footer.getFileMetaData().getSchema();
+ FilterCompat.Filter filter = getFilter(configuration);
+ blocks = filterRowGroups(filter, footer.getBlocks(), fileSchema);
+ } else {
+ // otherwise we find the row groups that were selected on the client
+ footer = readFooter(configuration, file, NO_FILTER);
+ Set<Long> offsets = new HashSet<>();
+ for (long offset : rowGroupOffsets) {
+ offsets.add(offset);
+ }
+ blocks = new ArrayList<>();
+ for (BlockMetaData block : footer.getBlocks()) {
+ if (offsets.contains(block.getStartingPos())) {
+ blocks.add(block);
+ }
+ }
+ // verify we found them all
+ if (blocks.size() != rowGroupOffsets.length) {
+ long[] foundRowGroupOffsets = new long[footer.getBlocks().size()];
+ for (int i = 0; i < foundRowGroupOffsets.length; i++) {
+ foundRowGroupOffsets[i] = footer.getBlocks().get(i).getStartingPos();
+ }
+ // this should never happen.
+ // provide a good error message in case there's a bug
+ throw new IllegalStateException(
+ "All the offsets listed in the split should be found in the file."
+ + " expected: " + Arrays.toString(rowGroupOffsets)
+ + " found: " + blocks
+ + " out of: " + Arrays.toString(foundRowGroupOffsets)
+ + " in range " + split.getStart() + ", " + split.getEnd());
+ }
+ }
+
+ for (BlockMetaData block : blocks) {
+ this.totalRowCount += block.getRowCount();
+ }
+ this.fileSchema = footer.getFileMetaData().getSchema();
+
+ MessageType tableSchema;
+ if (indexAccess) {
+ List<Integer> indexSequence = new ArrayList<>();
+
+ // Generates a sequence list of indexes
+ for(int i = 0; i < columnNamesList.size(); i++) {
+ indexSequence.add(i);
+ }
+
+ tableSchema = DataWritableReadSupport.getSchemaByIndex(fileSchema, columnNamesList,
+ indexSequence);
+ } else {
+ tableSchema = DataWritableReadSupport.getSchemaByName(fileSchema, columnNamesList,
+ columnTypesList);
+ }
+
+ List<Integer> indexColumnsWanted = ColumnProjectionUtils.getReadColumnIDs(configuration);
+ if (!ColumnProjectionUtils.isReadAllColumns(configuration) && !indexColumnsWanted.isEmpty()) {
+ requestedSchema =
+ DataWritableReadSupport.getSchemaByIndex(tableSchema, columnNamesList, indexColumnsWanted);
+ } else {
+ requestedSchema = fileSchema;
+ }
+
+ this.reader = new ParquetFileReader(
+ configuration, footer.getFileMetaData(), file, blocks, requestedSchema.getColumns());
+ }
+
+ @Override
+ public boolean next(
+ NullWritable nullWritable,
+ VectorizedRowBatch vectorizedRowBatch) throws IOException {
+ return nextBatch(vectorizedRowBatch);
+ }
+
+ @Override
+ public NullWritable createKey() {
+ return NullWritable.get();
+ }
+
+ @Override
+ public VectorizedRowBatch createValue() {
+ return rbCtx.createVectorizedRowBatch();
+ }
+
+ @Override
+ public long getPos() throws IOException {
+ //TODO
+ return 0;
+ }
+
+ @Override
+ public void close() throws IOException {
+ }
+
+ @Override
+ public float getProgress() throws IOException {
+ //TODO
+ return 0;
+ }
+
+ /**
+ * Advances to the next batch of rows. Returns false if there are no more.
+ */
+ private boolean nextBatch(VectorizedRowBatch columnarBatch) throws IOException {
+ columnarBatch.reset();
+ if (rowsReturned >= totalRowCount) {
+ return false;
+ }
+ checkEndOfRowGroup();
+
+ int num = (int) Math.min(VectorizedRowBatch.DEFAULT_SIZE, totalCountLoadedSoFar - rowsReturned);
+ for (int i = 0; i < columnReaders.length; ++i) {
+ if (columnReaders[i] == null) {
+ continue;
+ }
+ columnarBatch.cols[colsToInclude.get(i)].isRepeating = true;
+ columnReaders[i].readBatch(num, columnarBatch.cols[colsToInclude.get(i)],
+ columnTypesList.get(colsToInclude.get(i)));
+ }
+ rowsReturned += num;
+ columnarBatch.size = num;
+ return true;
+ }
+
+ private void checkEndOfRowGroup() throws IOException {
+ if (rowsReturned != totalCountLoadedSoFar) {
+ return;
+ }
+ PageReadStore pages = reader.readNextRowGroup();
+ if (pages == null) {
+ throw new IOException("expecting more rows but reached last block. Read "
+ + rowsReturned + " out of " + totalRowCount);
+ }
+ List<ColumnDescriptor> columns = requestedSchema.getColumns();
+ List<Type> types = requestedSchema.getFields();
+ columnReaders = new VectorizedColumnReader[columns.size()];
+ for (int i = 0; i < columns.size(); ++i) {
+ columnReaders[i] =
+ new VectorizedColumnReader(columns.get(i), pages.getPageReader(columns.get(i)),
+ skipTimestampConversion, types.get(i));
+ }
+ totalCountLoadedSoFar += pages.getRowCount();
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/936df7a1/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestVectorizedColumnReader.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestVectorizedColumnReader.java b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestVectorizedColumnReader.java
new file mode 100644
index 0000000..276ff19
--- /dev/null
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestVectorizedColumnReader.java
@@ -0,0 +1,429 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.io.parquet;
+
+import org.apache.commons.lang.ArrayUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx;
+import org.apache.hadoop.hive.ql.io.IOConstants;
+import org.apache.hadoop.hive.ql.io.parquet.read.DataWritableReadSupport;
+import org.apache.hadoop.hive.ql.io.parquet.serde.ArrayWritableObjectInspector;
+import org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedParquetRecordReader;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.plan.MapWork;
+import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.parquet.example.data.Group;
+import org.apache.parquet.example.data.simple.SimpleGroupFactory;
+import org.apache.parquet.hadoop.ParquetInputFormat;
+import org.apache.parquet.hadoop.ParquetWriter;
+import org.apache.parquet.hadoop.example.GroupReadSupport;
+import org.apache.parquet.hadoop.example.GroupWriteSupport;
+import org.apache.parquet.io.api.Binary;
+import org.apache.parquet.schema.MessageType;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Random;
+
+import static junit.framework.Assert.assertTrue;
+import static junit.framework.TestCase.assertFalse;
+import static org.apache.parquet.column.ParquetProperties.WriterVersion.PARQUET_1_0;
+import static org.apache.parquet.hadoop.api.ReadSupport.PARQUET_READ_SCHEMA;
+import static org.apache.parquet.hadoop.metadata.CompressionCodecName.GZIP;
+import static org.apache.parquet.schema.MessageTypeParser.parseMessageType;
+import static org.junit.Assert.assertEquals;
+
+public class TestVectorizedColumnReader {
+
+ private static final int nElements = 2500;
+ protected static final Configuration conf = new Configuration();
+ protected static final Path file =
+ new Path("target/test/TestParquetVectorReader/testParquetFile");
+ private static String[] uniqueStrs = new String[nElements];
+ private static boolean[] isNulls = new boolean[nElements];
+ private static Random random = new Random();
+ protected static final MessageType schema = parseMessageType(
+ "message test { "
+ + "required int32 int32_field; "
+ + "required int64 int64_field; "
+ + "required int96 int96_field; "
+ + "required double double_field; "
+ + "required float float_field; "
+ + "required boolean boolean_field; "
+ + "required fixed_len_byte_array(3) flba_field; "
+ + "optional fixed_len_byte_array(1) some_null_field; "
+ + "optional fixed_len_byte_array(1) all_null_field; "
+ + "optional binary binary_field; "
+ + "optional binary binary_field_non_repeating; "
+ + "} ");
+
+ @AfterClass
+ public static void cleanup() throws IOException {
+ FileSystem fs = file.getFileSystem(conf);
+ if (fs.exists(file)) {
+ fs.delete(file, true);
+ }
+ }
+
+ @BeforeClass
+ public static void prepareFile() throws IOException {
+ cleanup();
+
+ boolean dictionaryEnabled = true;
+ boolean validating = false;
+ GroupWriteSupport.setSchema(schema, conf);
+ SimpleGroupFactory f = new SimpleGroupFactory(schema);
+ ParquetWriter<Group> writer = new ParquetWriter<Group>(
+ file,
+ new GroupWriteSupport(),
+ GZIP, 1024*1024, 1024, 1024*1024,
+ dictionaryEnabled, validating, PARQUET_1_0, conf);
+ writeData(f, writer);
+ }
+
+ protected static void writeData(SimpleGroupFactory f, ParquetWriter<Group> writer) throws IOException {
+ initialStrings(uniqueStrs);
+ for (int i = 0; i < nElements; i++) {
+ Group group = f.newGroup()
+ .append("int32_field", i)
+ .append("int64_field", (long) 2 * i)
+ .append("int96_field", Binary.fromReusedByteArray("999999999999".getBytes()))
+ .append("double_field", i * 1.0)
+ .append("float_field", ((float) (i * 2.0)))
+ .append("boolean_field", i % 5 == 0)
+ .append("flba_field", "abc");
+
+ if (i % 2 == 1) {
+ group.append("some_null_field", "x");
+ }
+
+ if (i % 13 != 1) {
+ int binaryLen = i % 10;
+ group.append("binary_field",
+ Binary.fromString(new String(new char[binaryLen]).replace("\0", "x")));
+ }
+
+ if (uniqueStrs[i] != null) {
+ group.append("binary_field_non_repeating", Binary.fromString(uniqueStrs[i]));
+ }
+ writer.write(group);
+ }
+ writer.close();
+ }
+
+ private static String getRandomStr() {
+ int len = random.nextInt(10);
+ StringBuilder sb = new StringBuilder();
+ for (int i = 0; i < len; i++) {
+ sb.append((char) ('a' + random.nextInt(25)));
+ }
+ return sb.toString();
+ }
+
+ public static void initialStrings(String[] uniqueStrs) {
+ for (int i = 0; i < uniqueStrs.length; i++) {
+ String str = getRandomStr();
+ if (!str.isEmpty()) {
+ uniqueStrs[i] = str;
+ isNulls[i] = false;
+ }else{
+ isNulls[i] = true;
+ }
+ }
+ }
+
+ private VectorizedParquetRecordReader createParquetReader(String schemaString, Configuration conf)
+ throws IOException, InterruptedException, HiveException {
+ conf.set(PARQUET_READ_SCHEMA, schemaString);
+ HiveConf.setBoolVar(conf, HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, true);
+ HiveConf.setVar(conf, HiveConf.ConfVars.PLAN, "//tmp");
+
+ Job vectorJob = new Job(conf, "read vector");
+ ParquetInputFormat.setInputPaths(vectorJob, file);
+ ParquetInputFormat parquetInputFormat = new ParquetInputFormat(GroupReadSupport.class);
+ InputSplit split = (InputSplit) parquetInputFormat.getSplits(vectorJob).get(0);
+ initialVectorizedRowBatchCtx(conf);
+ return new VectorizedParquetRecordReader(split, new JobConf(conf));
+ }
+
+ private void initialVectorizedRowBatchCtx(Configuration conf) throws HiveException {
+ MapWork mapWork = new MapWork();
+ VectorizedRowBatchCtx rbCtx = new VectorizedRowBatchCtx();
+ rbCtx.init(createStructObjectInspector(conf), new String[0]);
+ mapWork.setVectorMode(true);
+ mapWork.setVectorizedRowBatchCtx(rbCtx);
+ Utilities.setMapWork(conf, mapWork);
+ }
+
+ private StructObjectInspector createStructObjectInspector(Configuration conf) {
+ // Create row related objects
+ String columnNames = conf.get(IOConstants.COLUMNS);
+ List<String> columnNamesList = DataWritableReadSupport.getColumnNames(columnNames);
+ String columnTypes = conf.get(IOConstants.COLUMNS_TYPES);
+ List<TypeInfo> columnTypesList = DataWritableReadSupport.getColumnTypes(columnTypes);
+ TypeInfo rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNamesList, columnTypesList);
+ return new ArrayWritableObjectInspector((StructTypeInfo) rowTypeInfo);
+ }
+
+ @Test
+ public void testIntRead() throws Exception {
+ Configuration conf = new Configuration();
+ conf.set(IOConstants.COLUMNS,"int32_field");
+ conf.set(IOConstants.COLUMNS_TYPES,"int");
+ conf.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false);
+ conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0");
+ VectorizedParquetRecordReader reader =
+ createParquetReader("message test { required int32 int32_field;}", conf);
+ VectorizedRowBatch previous = reader.createValue();
+ try {
+ long c = 0;
+ while (reader.next(NullWritable.get(), previous)) {
+ LongColumnVector vector = (LongColumnVector) previous.cols[0];
+ assertTrue(vector.noNulls);
+ for (int i = 0; i < vector.vector.length; i++) {
+ if(c == nElements){
+ break;
+ }
+ assertEquals(c, vector.vector[i]);
+ assertFalse(vector.isNull[i]);
+ c++;
+ }
+ }
+ assertEquals(nElements, c);
+ } finally {
+ reader.close();
+ }
+ }
+
+ @Test
+ public void testLongRead() throws Exception {
+ Configuration conf = new Configuration();
+ conf.set(IOConstants.COLUMNS,"int64_field");
+ conf.set(IOConstants.COLUMNS_TYPES, "bigint");
+ conf.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false);
+ conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0");
+ VectorizedParquetRecordReader reader =
+ createParquetReader("message test { required int64 int64_field;}", conf);
+ VectorizedRowBatch previous = reader.createValue();
+ try {
+ long c = 0;
+ while (reader.next(NullWritable.get(), previous)) {
+ LongColumnVector vector = (LongColumnVector) previous.cols[0];
+ assertTrue(vector.noNulls);
+ for (int i = 0; i < vector.vector.length; i++) {
+ if(c == nElements){
+ break;
+ }
+ assertEquals(2 * c, vector.vector[i]);
+ assertFalse(vector.isNull[i]);
+ c++;
+ }
+ }
+ assertEquals(nElements, c);
+ } finally {
+ reader.close();
+ }
+ }
+
+ @Test
+ public void testDoubleRead() throws Exception {
+ Configuration conf = new Configuration();
+ conf.set(IOConstants.COLUMNS,"double_field");
+ conf.set(IOConstants.COLUMNS_TYPES, "double");
+ conf.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false);
+ conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0");
+ VectorizedParquetRecordReader reader =
+ createParquetReader("message test { required double double_field;}", conf);
+ VectorizedRowBatch previous = reader.createValue();
+ try {
+ long c = 0;
+ while (reader.next(NullWritable.get(), previous)) {
+ DoubleColumnVector vector = (DoubleColumnVector) previous.cols[0];
+ assertTrue(vector.noNulls);
+ for (int i = 0; i < vector.vector.length; i++) {
+ if(c == nElements){
+ break;
+ }
+ assertEquals(1.0 * c, vector.vector[i], 0);
+ assertFalse(vector.isNull[i]);
+ c++;
+ }
+ }
+ assertEquals(nElements, c);
+ } finally {
+ reader.close();
+ }
+ }
+
+ @Test
+ public void testFloatRead() throws Exception {
+ Configuration conf = new Configuration();
+ conf.set(IOConstants.COLUMNS,"float_field");
+ conf.set(IOConstants.COLUMNS_TYPES, "float");
+ conf.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false);
+ conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0");
+ VectorizedParquetRecordReader reader =
+ createParquetReader("message test { required float float_field;}", conf);
+ VectorizedRowBatch previous = reader.createValue();
+ try {
+ long c = 0;
+ while (reader.next(NullWritable.get(), previous)) {
+ DoubleColumnVector vector = (DoubleColumnVector) previous.cols[0];
+ assertTrue(vector.noNulls);
+ for (int i = 0; i < vector.vector.length; i++) {
+ if(c == nElements){
+ break;
+ }
+ assertEquals((float)2.0 * c, vector.vector[i], 0);
+ assertFalse(vector.isNull[i]);
+ c++;
+ }
+ }
+ assertEquals(nElements, c);
+ } finally {
+ reader.close();
+ }
+ }
+
+ @Test
+ public void testBooleanRead() throws Exception {
+ Configuration conf = new Configuration();
+ conf.set(IOConstants.COLUMNS,"boolean_field");
+ conf.set(IOConstants.COLUMNS_TYPES, "boolean");
+ conf.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false);
+ conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0");
+ VectorizedParquetRecordReader reader =
+ createParquetReader("message test { required boolean boolean_field;}", conf);
+ VectorizedRowBatch previous = reader.createValue();
+ try {
+ long c = 0;
+ while (reader.next(NullWritable.get(), previous)) {
+ LongColumnVector vector = (LongColumnVector) previous.cols[0];
+ assertTrue(vector.noNulls);
+ for (int i = 0; i < vector.vector.length; i++) {
+ if(c == nElements){
+ break;
+ }
+ int e = (c % 5 == 0) ? 1 : 0;
+ assertEquals(e, vector.vector[i]);
+ assertFalse(vector.isNull[i]);
+ c++;
+ }
+ }
+ assertEquals(nElements, c);
+ } finally {
+ reader.close();
+ }
+ }
+
+ @Test
+ public void testBinaryReadDictionaryEncoding() throws Exception {
+ Configuration conf = new Configuration();
+ conf.set(IOConstants.COLUMNS,"binary_field");
+ conf.set(IOConstants.COLUMNS_TYPES, "string");
+ conf.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false);
+ conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0");
+ VectorizedParquetRecordReader reader =
+ createParquetReader("message test { required binary binary_field;}", conf);
+ VectorizedRowBatch previous = reader.createValue();
+ int c = 0;
+ try {
+ while (reader.next(NullWritable.get(), previous)) {
+ BytesColumnVector vector = (BytesColumnVector) previous.cols[0];
+ boolean noNull = true;
+ for (int i = 0; i < vector.vector.length; i++) {
+ if(c == nElements){
+ break;
+ }
+ if (c % 13 == 1) {
+ assertTrue(vector.isNull[i]);
+ } else {
+ assertFalse(vector.isNull[i]);
+ int binaryLen = c % 10;
+ String expected = new String(new char[binaryLen]).replace("\0", "x");
+ String actual = new String(ArrayUtils
+ .subarray(vector.vector[i], vector.start[i], vector.start[i] + vector.length[i]));
+ assertEquals("Failed at " + c, expected, actual);
+ noNull = false;
+ }
+ c++;
+ }
+ assertEquals("No Null check failed at " + c, noNull, vector.noNulls);
+ assertFalse(vector.isRepeating);
+ }
+ assertEquals(nElements, c);
+ } finally {
+ reader.close();
+ }
+ }
+
+ @Test
+ public void testBinaryRead() throws Exception {
+ Configuration conf = new Configuration();
+ conf.set(IOConstants.COLUMNS,"binary_field_non_repeating");
+ conf.set(IOConstants.COLUMNS_TYPES, "string");
+ conf.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false);
+ conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0");
+ VectorizedParquetRecordReader reader =
+ createParquetReader("message test { required binary binary_field_non_repeating;}", conf);
+ VectorizedRowBatch previous = reader.createValue();
+ int c = 0;
+ try {
+ while (reader.next(NullWritable.get(), previous)) {
+ BytesColumnVector vector = (BytesColumnVector) previous.cols[0];
+ boolean noNull = true;
+ for (int i = 0; i < vector.vector.length; i++) {
+ if(c == nElements){
+ break;
+ }
+ String actual;
+ assertEquals("Null assert failed at " + c, isNulls[c], vector.isNull[i]);
+ if (!vector.isNull[i]) {
+ actual = new String(ArrayUtils
+ .subarray(vector.vector[i], vector.start[i], vector.start[i] + vector.length[i]));
+ assertEquals("failed at " + c, uniqueStrs[c], actual);
+ }else{
+ noNull = false;
+ }
+ c++;
+ }
+ assertEquals("No Null check failed at " + c, noNull, vector.noNulls);
+ assertFalse(vector.isRepeating);
+ }
+ assertEquals("It doesn't exit at expected position", nElements, c);
+ } finally {
+ reader.close();
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/936df7a1/ql/src/test/queries/clientpositive/parquet_types_non_dictionary_encoding_vectorization.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/parquet_types_non_dictionary_encoding_vectorization.q b/ql/src/test/queries/clientpositive/parquet_types_non_dictionary_encoding_vectorization.q
new file mode 100644
index 0000000..7de444f
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/parquet_types_non_dictionary_encoding_vectorization.q
@@ -0,0 +1,94 @@
+set hive.mapred.mode=nonstrict;
+DROP TABLE parquet_types_staging;
+DROP TABLE parquet_types;
+
+set hive.vectorized.execution.enabled=true;
+set hive.vectorized.execution.reduce.enabled=true;
+set hive.vectorized.use.row.serde.deserialize=true;
+set hive.vectorized.use.vector.serde.deserialize=true;
+set hive.vectorized.execution.reduce.groupby.enabled = true;
+
+CREATE TABLE parquet_types_staging (
+ cint int,
+ ctinyint tinyint,
+ csmallint smallint,
+ cfloat float,
+ cdouble double,
+ cstring1 string,
+ t timestamp,
+ cchar char(5),
+ cvarchar varchar(10),
+ cbinary string,
+ m1 map<string, varchar(3)>,
+ l1 array<int>,
+ st1 struct<c1:int, c2:char(1)>,
+ d date
+) ROW FORMAT DELIMITED
+FIELDS TERMINATED BY '|'
+COLLECTION ITEMS TERMINATED BY ','
+MAP KEYS TERMINATED BY ':';
+
+CREATE TABLE parquet_types (
+ cint int,
+ ctinyint tinyint,
+ csmallint smallint,
+ cfloat float,
+ cdouble double,
+ cstring1 string,
+ t timestamp,
+ cchar char(5),
+ cvarchar varchar(10),
+ cbinary binary,
+ m1 map<string, varchar(3)>,
+ l1 array<int>,
+ st1 struct<c1:int, c2:char(1)>,
+ d date
+) STORED AS PARQUET;
+
+LOAD DATA LOCAL INPATH '../../data/files/parquet_non_dictionary_types.txt' OVERWRITE INTO TABLE
+parquet_types_staging;
+
+SELECT * FROM parquet_types_staging;
+
+INSERT OVERWRITE TABLE parquet_types
+SELECT cint, ctinyint, csmallint, cfloat, cdouble, cstring1, t, cchar, cvarchar,
+unhex(cbinary), m1, l1, st1, d FROM parquet_types_staging;
+
+-- test types in group by
+
+EXPLAIN SELECT ctinyint,
+ MAX(cint),
+ MIN(csmallint),
+ COUNT(cstring1),
+ ROUND(AVG(cfloat), 5),
+ ROUND(STDDEV_POP(cdouble),5)
+FROM parquet_types
+GROUP BY ctinyint
+ORDER BY ctinyint
+;
+
+SELECT ctinyint,
+ MAX(cint),
+ MIN(csmallint),
+ COUNT(cstring1),
+ ROUND(AVG(cfloat), 5),
+ ROUND(STDDEV_POP(cdouble),5)
+FROM parquet_types
+GROUP BY ctinyint
+ORDER BY ctinyint
+;
+
+EXPLAIN SELECT cfloat, count(*) FROM parquet_types GROUP BY cfloat ORDER BY cfloat;
+SELECT cfloat, count(*) FROM parquet_types GROUP BY cfloat ORDER BY cfloat;
+
+EXPLAIN SELECT cchar, count(*) FROM parquet_types GROUP BY cchar ORDER BY cchar;
+SELECT cchar, count(*) FROM parquet_types GROUP BY cchar ORDER BY cchar;
+
+EXPLAIN SELECT cvarchar, count(*) FROM parquet_types GROUP BY cvarchar ORDER BY cvarchar;
+SELECT cvarchar, count(*) FROM parquet_types GROUP BY cvarchar ORDER BY cvarchar;
+
+EXPLAIN SELECT cstring1, count(*) FROM parquet_types GROUP BY cstring1 ORDER BY cstring1;
+SELECT cstring1, count(*) FROM parquet_types GROUP BY cstring1 ORDER BY cstring1;
+
+EXPLAIN SELECT hex(cbinary), count(*) FROM parquet_types GROUP BY cbinary;
+SELECT hex(cbinary), count(*) FROM parquet_types GROUP BY cbinary;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/936df7a1/ql/src/test/queries/clientpositive/parquet_types_vectorization.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/parquet_types_vectorization.q b/ql/src/test/queries/clientpositive/parquet_types_vectorization.q
new file mode 100644
index 0000000..bb0e5b2
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/parquet_types_vectorization.q
@@ -0,0 +1,96 @@
+set hive.mapred.mode=nonstrict;
+DROP TABLE parquet_types_staging;
+DROP TABLE parquet_types;
+
+set hive.vectorized.execution.enabled=true;
+set hive.vectorized.execution.reduce.enabled=true;
+set hive.vectorized.use.row.serde.deserialize=true;
+set hive.vectorized.use.vector.serde.deserialize=true;
+set hive.vectorized.execution.reduce.groupby.enabled = true;
+
+CREATE TABLE parquet_types_staging (
+ cint int,
+ ctinyint tinyint,
+ csmallint smallint,
+ cfloat float,
+ cdouble double,
+ cstring1 string,
+ t timestamp,
+ cchar char(5),
+ cvarchar varchar(10),
+ cbinary string,
+ m1 map<string, varchar(3)>,
+ l1 array<int>,
+ st1 struct<c1:int, c2:char(1)>,
+ d date
+) ROW FORMAT DELIMITED
+FIELDS TERMINATED BY '|'
+COLLECTION ITEMS TERMINATED BY ','
+MAP KEYS TERMINATED BY ':';
+
+CREATE TABLE parquet_types (
+ cint int,
+ ctinyint tinyint,
+ csmallint smallint,
+ cfloat float,
+ cdouble double,
+ cstring1 string,
+ t timestamp,
+ cchar char(5),
+ cvarchar varchar(10),
+ cbinary binary,
+ m1 map<string, varchar(3)>,
+ l1 array<int>,
+ st1 struct<c1:int, c2:char(1)>,
+ d date
+) STORED AS PARQUET;
+
+LOAD DATA LOCAL INPATH '../../data/files/parquet_types.txt' OVERWRITE INTO TABLE parquet_types_staging;
+
+SELECT * FROM parquet_types_staging;
+
+INSERT OVERWRITE TABLE parquet_types
+SELECT cint, ctinyint, csmallint, cfloat, cdouble, cstring1, t, cchar, cvarchar,
+unhex(cbinary), m1, l1, st1, d FROM parquet_types_staging;
+
+-- test types in group by
+
+EXPLAIN SELECT ctinyint,
+ MAX(cint),
+ MIN(csmallint),
+ COUNT(cstring1),
+ ROUND(AVG(cfloat), 5),
+ ROUND(STDDEV_POP(cdouble),5)
+FROM parquet_types
+GROUP BY ctinyint
+ORDER BY ctinyint
+;
+
+SELECT ctinyint,
+ MAX(cint),
+ MIN(csmallint),
+ COUNT(cstring1),
+ ROUND(AVG(cfloat), 5),
+ ROUND(STDDEV_POP(cdouble),5)
+FROM parquet_types
+GROUP BY ctinyint
+ORDER BY ctinyint
+;
+
+EXPLAIN SELECT cfloat, count(*) FROM parquet_types GROUP BY cfloat ORDER BY cfloat;
+SELECT cfloat, count(*) FROM parquet_types GROUP BY cfloat ORDER BY cfloat;
+
+EXPLAIN SELECT cchar, count(*) FROM parquet_types GROUP BY cchar ORDER BY cchar;
+SELECT cchar, count(*) FROM parquet_types GROUP BY cchar ORDER BY cchar;
+
+EXPLAIN SELECT cvarchar, count(*) FROM parquet_types GROUP BY cvarchar ORDER BY cvarchar;
+SELECT cvarchar, count(*) FROM parquet_types GROUP BY cvarchar ORDER BY cvarchar;
+
+EXPLAIN SELECT cstring1, count(*) FROM parquet_types GROUP BY cstring1 ORDER BY cstring1;
+SELECT cstring1, count(*) FROM parquet_types GROUP BY cstring1 ORDER BY cstring1;
+
+EXPLAIN SELECT t, count(*) FROM parquet_types GROUP BY t ORDER BY t;
+SELECT t, count(*) FROM parquet_types GROUP BY t ORDER BY t;
+
+EXPLAIN SELECT hex(cbinary), count(*) FROM parquet_types GROUP BY cbinary;
+SELECT hex(cbinary), count(*) FROM parquet_types GROUP BY cbinary;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/936df7a1/ql/src/test/results/clientpositive/llap/vectorized_parquet.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorized_parquet.q.out b/ql/src/test/results/clientpositive/llap/vectorized_parquet.q.out
index 8345132..e42453d 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_parquet.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_parquet.q.out
@@ -150,7 +150,7 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: tinyint)
Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: struct<count:bigint,sum:double,input:float>), _col5 (type: struct<count:bigint,sum:double,variance:double>)
- Execution mode: llap
+ Execution mode: vectorized, llap
LLAP IO: no inputs
Reducer 2
Execution mode: llap
http://git-wip-us.apache.org/repos/asf/hive/blob/936df7a1/ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out b/ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out
index b49d5dd..0524cb3 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out
@@ -250,19 +250,19 @@ Stage-0
limit:-1
Stage-1
Reducer 3 vectorized, llap
- File Output Operator [FS_10]
- Select Operator [SEL_9] (rows=11 width=11)
+ File Output Operator [FS_12]
+ Select Operator [SEL_11] (rows=11 width=11)
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"]
<-Reducer 2 [SIMPLE_EDGE] llap
SHUFFLE [RS_6]
Group By Operator [GBY_4] (rows=11 width=11)
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["max(VALUE._col0)","min(VALUE._col1)","count(VALUE._col2)","avg(VALUE._col3)","stddev_pop(VALUE._col4)","max(VALUE._col5)"],keys:KEY._col0
- <-Map 1 [SIMPLE_EDGE] llap
+ <-Map 1 [SIMPLE_EDGE] vectorized, llap
SHUFFLE [RS_3]
PartitionCols:_col0
- Group By Operator [GBY_2] (rows=22 width=11)
+ Group By Operator [GBY_10] (rows=22 width=11)
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["max(cint)","min(csmallint)","count(cstring1)","avg(cfloat)","stddev_pop(cdouble)","max(cdecimal)"],keys:ctinyint
- Select Operator [SEL_1] (rows=22 width=11)
+ Select Operator [SEL_9] (rows=22 width=11)
Output:["ctinyint","cint","csmallint","cstring1","cfloat","cdouble","cdecimal"]
TableScan [TS_0] (rows=22 width=11)
default@parquet_types,parquet_types,Tbl:COMPLETE,Col:NONE,Output:["cint","ctinyint","csmallint","cfloat","cdouble","cstring1","cdecimal"]
[4/4] hive git commit: HIVE-14815: Implement Parquet vectorization
reader for Primitive types(Ferdinand Xu, review by Chao Sun) This closes #104
Posted by xu...@apache.org.
HIVE-14815: Implement Parquet vectorization reader for Primitive types(Ferdinand Xu, review by Chao Sun) This closes #104
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/936df7a1
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/936df7a1
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/936df7a1
Branch: refs/heads/master
Commit: 936df7a15a3ce323300cabe7b2ebb90e22f2069d
Parents: 67c022f
Author: Ferdinand Xu <ch...@intel.com>
Authored: Fri Nov 18 08:17:39 2016 +0800
Committer: Ferdinand Xu <ch...@intel.com>
Committed: Fri Nov 18 08:17:39 2016 +0800
----------------------------------------------------------------------
data/files/parquet_non_dictionary_types.txt | 300 +++
.../ql/io/parquet/MapredParquetInputFormat.java | 10 +-
.../ql/io/parquet/ParquetRecordReaderBase.java | 171 ++
.../parquet/VectorizedParquetInputFormat.java | 136 +-
.../parquet/read/DataWritableReadSupport.java | 8 +-
.../read/ParquetRecordReaderWrapper.java | 141 +-
.../ql/io/parquet/timestamp/NanoTimeUtils.java | 2 +-
.../parquet/vector/VectorizedColumnReader.java | 571 ++++
.../vector/VectorizedParquetRecordReader.java | 289 +++
.../io/parquet/TestVectorizedColumnReader.java | 429 +++
...ypes_non_dictionary_encoding_vectorization.q | 94 +
.../parquet_types_vectorization.q | 96 +
.../llap/vectorized_parquet.q.out | 2 +-
.../llap/vectorized_parquet_types.q.out | 10 +-
..._non_dictionary_encoding_vectorization.q.out | 2452 ++++++++++++++++++
.../parquet_types_vectorization.q.out | 850 ++++++
.../vectorized_parquet_types.q.out | 3 +
17 files changed, 5284 insertions(+), 280 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/936df7a1/data/files/parquet_non_dictionary_types.txt
----------------------------------------------------------------------
diff --git a/data/files/parquet_non_dictionary_types.txt b/data/files/parquet_non_dictionary_types.txt
new file mode 100644
index 0000000..50ea5d2
--- /dev/null
+++ b/data/files/parquet_non_dictionary_types.txt
@@ -0,0 +1,300 @@
+1000|-128|0|0.0|0.3||1940-01-01 01:01:01.111111111||||:|1000,1001|1000,b|1940-01-01|0.0
+1001|-127|1|0.3|1.3|b|1941-02-02 01:01:01.111111111|b|b|b|b:b|1001,1002|1001,c|1941-02-02|0.3
+1002|-126|2|0.6|2.3|c|1942-03-03 01:01:01.111111111|c|c|c|c:c|1002,1003|1002,d|1942-03-03|0.6
+1003|-125|3|0.9|3.3|d|1943-04-04 01:01:01.111111111|d|d|d|d:d|1003,1004|1003,e|1943-04-04|0.9
+1004|-124|4|1.2|4.3|e|1944-05-05 01:01:01.111111111|e|e|e|e:e|1004,1005|1004,f|1944-05-05|1.2
+1005|-123|5|1.5|5.3|f|1945-06-06 01:01:01.111111111|f|f|f|f:f|1005,1006|1005,g|1945-06-06|1.5
+1006|-122|6|1.8|6.3|g|1946-07-07 01:01:01.111111111|g|g|g|g:g|1006,1007|1006,h|1946-07-07|1.8
+1007|-121|7|2.1|7.3|h|1947-08-08 01:01:01.111111111|h|h|h|h:h|1007,1008|1007,i|1947-08-08|2.1
+1008|-120|8|2.4|8.3|i|1948-09-09 01:01:01.111111111|i|i|i|i:i|1008,1009|1008,j|1948-09-09|2.4
+1009|-119|9|2.7|9.3|j|1949-10-10 01:01:01.111111111|j|j|j|j:j|1009,1010|1009,k|1949-10-10|2.7
+1010|-118|10|3.0|10.3|k|1950-11-11 01:01:01.111111111|k|k|k|k:k|1010,1011|1010,l|1950-11-11|3.0
+1011|-117|11|3.3|11.3|l|1951-12-12 01:01:01.111111111|l|l|l|l:l|1011,1012|1011,m|1951-12-12|3.3
+1012|-116|12|3.6|12.3|m|1952-01-13 01:01:01.111111111|m|m|m|m:m|1012,1013|1012,n|1952-01-13|3.6
+1013|-115|13|3.9|13.3|n|1953-02-14 01:01:01.111111111|n|n|n|n:n|1013,1014|1013,o|1953-02-14|3.9
+1014|-114|14|4.2|14.3|o|1954-03-15 01:01:01.111111111|o|o|o|o:o|1014,1015|1014,p|1954-03-15|4.2
+1015|-113|15|4.5|15.3|p|1955-04-16 01:01:01.111111111|p|p|p|p:p|1015,1016|1015,q|1955-04-16|4.5
+1016|-112|16|4.8|16.3|q|1956-05-17 01:01:01.111111111|q|q|q|q:q|1016,1017|1016,r|1956-05-17|4.8
+1017|-111|17|5.1|17.3|r|1957-06-18 01:01:01.111111111|r|r|r|r:r|1017,1018|1017,s|1957-06-18|5.1
+1018|-110|18|5.4|18.3|s|1958-07-19 01:01:01.111111111|s|s|s|s:s|1018,1019|1018,t|1958-07-19|5.4
+1019|-109|19|5.7|19.3|t|1959-08-20 01:01:01.111111111|t|t|t|t:t|1019,1020|1019,u|1959-08-20|5.7
+1020|-108|20|6.0|20.3|u|1960-09-21 01:01:01.111111111|u|u|u|u:u|1020,1021|1020,v|1960-09-21|6.0
+1021|-107|21|6.3|21.3|v|1961-10-22 01:01:01.111111111|v|v|v|v:v|1021,1022|1021,w|1961-10-22|6.3
+1022|-106|22|6.6|22.3|w|1962-11-23 01:01:01.111111111|w|w|w|w:w|1022,1023|1022,x|1962-11-23|6.6
+1023|-105|23|6.9|23.3|x|1963-12-24 01:01:01.111111111|x|x|x|x:x|1023,1024|1023,y|1963-12-24|6.9
+1024|-104|24|7.2|24.3|y|1964-01-25 01:01:01.111111111|y|y|y|y:y|1024,1025|1024,z|1964-01-25|7.2
+1025|-103|25|7.5|25.3|z|1965-02-26 01:01:01.111111111|z|z|z|z:z|1025,1026|1025,ba|1965-02-26|7.5
+1026|-102|26|7.8|26.3|ba|1966-03-27 01:01:01.111111111|ba|ba|ba|ba:ba|1026,1027|1026,bb|1966-03-27|7.8
+1027|-101|27|8.1|27.3|bb|1967-04-01 01:01:01.111111111|bb|bb|bb|bb:bb|1027,1028|1027,bc|1967-04-01|8.1
+1028|-100|28|8.4|28.3|bc|1968-05-02 01:01:01.111111111|bc|bc|bc|bc:bc|1028,1029|1028,bd|1968-05-02|8.4
+1029|-99|29|8.7|29.3|bd|1969-06-03 01:01:01.111111111|bd|bd|bd|bd:bd|1029,1030|1029,be|1969-06-03|8.7
+1030|-98|30|9.0|30.3|be|1970-07-04 01:01:01.111111111|be|be|be|be:be|1030,1031|1030,bf|1970-07-04|9.0
+1031|-97|31|9.3|31.3|bf|1971-08-05 01:01:01.111111111|bf|bf|bf|bf:bf|1031,1032|1031,bg|1971-08-05|9.3
+1032|-96|32|9.6|32.3|bg|1972-09-06 01:01:01.111111111|bg|bg|bg|bg:bg|1032,1033|1032,bh|1972-09-06|9.6
+1033|-95|33|9.9|33.3|bh|1973-10-07 01:01:01.111111111|bh|bh|bh|bh:bh|1033,1034|1033,bi|1973-10-07|9.9
+1034|-94|34|10.2|34.3|bi|1974-11-08 01:01:01.111111111|bi|bi|bi|bi:bi|1034,1035|1034,bj|1974-11-08|10.2
+1035|-93|35|10.5|35.3|bj|1975-12-09 01:01:01.111111111|bj|bj|bj|bj:bj|1035,1036|1035,bk|1975-12-09|10.5
+1036|-92|36|10.8|36.3|bk|1976-01-10 01:01:01.111111111|bk|bk|bk|bk:bk|1036,1037|1036,bl|1976-01-10|10.8
+1037|-91|37|11.1|37.3|bl|1977-02-11 01:01:01.111111111|bl|bl|bl|bl:bl|1037,1038|1037,bm|1977-02-11|11.1
+1038|-90|38|11.4|38.3|bm|1978-03-12 01:01:01.111111111|bm|bm|bm|bm:bm|1038,1039|1038,bn|1978-03-12|11.4
+1039|-89|39|11.7|39.3|bn|1979-04-13 01:01:01.111111111|bn|bn|bn|bn:bn|1039,1040|1039,bo|1979-04-13|11.7
+1040|-88|40|12.0|40.3|bo|1980-05-14 01:01:01.111111111|bo|bo|bo|bo:bo|1040,1041|1040,bp|1980-05-14|12.0
+1041|-87|41|12.3|41.3|bp|1981-06-15 01:01:01.111111111|bp|bp|bp|bp:bp|1041,1042|1041,bq|1981-06-15|12.3
+1042|-86|42|12.6|42.3|bq|1982-07-16 01:01:01.111111111|bq|bq|bq|bq:bq|1042,1043|1042,br|1982-07-16|12.6
+1043|-85|43|12.9|43.3|br|1983-08-17 01:01:01.111111111|br|br|br|br:br|1043,1044|1043,bs|1983-08-17|12.9
+1044|-84|44|13.2|44.3|bs|1984-09-18 01:01:01.111111111|bs|bs|bs|bs:bs|1044,1045|1044,bt|1984-09-18|13.2
+1045|-83|45|13.5|45.3|bt|1985-10-19 01:01:01.111111111|bt|bt|bt|bt:bt|1045,1046|1045,bu|1985-10-19|13.5
+1046|-82|46|13.8|46.3|bu|1986-11-20 01:01:01.111111111|bu|bu|bu|bu:bu|1046,1047|1046,bv|1986-11-20|13.8
+1047|-81|47|14.1|47.3|bv|1987-12-21 01:01:01.111111111|bv|bv|bv|bv:bv|1047,1048|1047,bw|1987-12-21|14.1
+1048|-80|48|14.4|48.3|bw|1988-01-22 01:01:01.111111111|bw|bw|bw|bw:bw|1048,1049|1048,bx|1988-01-22|14.4
+1049|-79|49|14.7|49.3|bx|1989-02-23 01:01:01.111111111|bx|bx|bx|bx:bx|1049,1050|1049,by|1989-02-23|14.7
+1050|-78|50|15.0|50.3|by|1990-03-24 01:01:01.111111111|by|by|by|by:by|1050,1051|1050,bz|1990-03-24|15.0
+1051|-77|51|15.3|51.3|bz|1991-04-25 01:01:01.111111111|bz|bz|bz|bz:bz|1051,1052|1051,ca|1991-04-25|15.3
+1052|-76|52|15.6|52.3|ca|1992-05-26 01:01:01.111111111|ca|ca|ca|ca:ca|1052,1053|1052,cb|1992-05-26|15.6
+1053|-75|53|15.9|53.3|cb|1993-06-27 01:01:01.111111111|cb|cb|cb|cb:cb|1053,1054|1053,cc|1993-06-27|15.9
+1054|-74|54|16.2|54.3|cc|1994-07-01 01:01:01.111111111|cc|cc|cc|cc:cc|1054,1055|1054,cd|1994-07-01|16.2
+1055|-73|55|16.5|55.3|cd|1995-08-02 01:01:01.111111111|cd|cd|cd|cd:cd|1055,1056|1055,ce|1995-08-02|16.5
+1056|-72|56|16.8|56.3|ce|1996-09-03 01:01:01.111111111|ce|ce|ce|ce:ce|1056,1057|1056,cf|1996-09-03|16.8
+1057|-71|57|17.1|57.3|cf|1997-10-04 01:01:01.111111111|cf|cf|cf|cf:cf|1057,1058|1057,cg|1997-10-04|17.1
+1058|-70|58|17.4|58.3|cg|1998-11-05 01:01:01.111111111|cg|cg|cg|cg:cg|1058,1059|1058,ch|1998-11-05|17.4
+1059|-69|59|17.7|59.3|ch|1999-12-06 01:01:01.111111111|ch|ch|ch|ch:ch|1059,1060|1059,ci|1999-12-06|17.7
+1060|-68|60|18.0|60.3|ci|2000-01-07 01:01:01.111111111|ci|ci|ci|ci:ci|1060,1061|1060,cj|2000-01-07|18.0
+1061|-67|61|18.3|61.3|cj|2001-02-08 01:01:01.111111111|cj|cj|cj|cj:cj|1061,1062|1061,ck|2001-02-08|18.3
+1062|-66|62|18.6|62.3|ck|2002-03-09 01:01:01.111111111|ck|ck|ck|ck:ck|1062,1063|1062,cl|2002-03-09|18.6
+1063|-65|63|18.9|63.3|cl|2003-04-10 01:01:01.111111111|cl|cl|cl|cl:cl|1063,1064|1063,cm|2003-04-10|18.9
+1064|-64|64|19.2|64.3|cm|2004-05-11 01:01:01.111111111|cm|cm|cm|cm:cm|1064,1065|1064,cn|2004-05-11|19.2
+1065|-63|65|19.5|65.3|cn|2005-06-12 01:01:01.111111111|cn|cn|cn|cn:cn|1065,1066|1065,co|2005-06-12|19.5
+1066|-62|66|19.8|66.3|co|2006-07-13 01:01:01.111111111|co|co|co|co:co|1066,1067|1066,cp|2006-07-13|19.8
+1067|-61|67|20.1|67.3|cp|2007-08-14 01:01:01.111111111|cp|cp|cp|cp:cp|1067,1068|1067,cq|2007-08-14|20.1
+1068|-60|68|20.4|68.3|cq|2008-09-15 01:01:01.111111111|cq|cq|cq|cq:cq|1068,1069|1068,cr|2008-09-15|20.4
+1069|-59|69|20.7|69.3|cr|2009-10-16 01:01:01.111111111|cr|cr|cr|cr:cr|1069,1070|1069,cs|2009-10-16|20.7
+1070|-58|70|21.0|70.3|cs|2010-11-17 01:01:01.111111111|cs|cs|cs|cs:cs|1070,1071|1070,ct|2010-11-17|21.0
+1071|-57|71|21.3|71.3|ct|2011-12-18 01:01:01.111111111|ct|ct|ct|ct:ct|1071,1072|1071,cu|2011-12-18|21.3
+1072|-56|72|21.6|72.3|cu|2012-01-19 01:01:01.111111111|cu|cu|cu|cu:cu|1072,1073|1072,cv|2012-01-19|21.6
+1073|-55|73|21.9|73.3|cv|2013-02-20 01:01:01.111111111|cv|cv|cv|cv:cv|1073,1074|1073,cw|2013-02-20|21.9
+1074|-54|74|22.2|74.3|cw|2014-03-21 01:01:01.111111111|cw|cw|cw|cw:cw|1074,1075|1074,cx|2014-03-21|22.2
+1075|-53|75|22.5|75.3|cx|2015-04-22 01:01:01.111111111|cx|cx|cx|cx:cx|1075,1076|1075,cy|2015-04-22|22.5
+1076|-52|76|22.8|76.3|cy|2016-05-23 01:01:01.111111111|cy|cy|cy|cy:cy|1076,1077|1076,cz|2016-05-23|22.8
+1077|-51|77|23.1|77.3|cz|2017-06-24 01:01:01.111111111|cz|cz|cz|cz:cz|1077,1078|1077,da|2017-06-24|23.1
+1078|-50|78|23.4|78.3|da|2018-07-25 01:01:01.111111111|da|da|da|da:da|1078,1079|1078,db|2018-07-25|23.4
+1079|-49|79|23.7|79.3|db|2019-08-26 01:01:01.111111111|db|db|db|db:db|1079,1080|1079,dc|2019-08-26|23.7
+1080|-48|80|24.0|80.3|dc|2020-09-27 01:01:01.111111111|dc|dc|dc|dc:dc|1080,1081|1080,dd|2020-09-27|24.0
+1081|-47|81|24.3|81.3|dd|2021-10-01 01:01:01.111111111|dd|dd|dd|dd:dd|1081,1082|1081,de|2021-10-01|24.3
+1082|-46|82|24.6|82.3|de|2022-11-02 01:01:01.111111111|de|de|de|de:de|1082,1083|1082,df|2022-11-02|24.6
+1083|-45|83|24.9|83.3|df|2023-12-03 01:01:01.111111111|df|df|df|df:df|1083,1084|1083,dg|2023-12-03|24.9
+1084|-44|84|25.2|84.3|dg|2024-01-04 01:01:01.111111111|dg|dg|dg|dg:dg|1084,1085|1084,dh|2024-01-04|25.2
+1085|-43|85|25.5|85.3|dh|2025-02-05 01:01:01.111111111|dh|dh|dh|dh:dh|1085,1086|1085,di|2025-02-05|25.5
+1086|-42|86|25.8|86.3|di|2026-03-06 01:01:01.111111111|di|di|di|di:di|1086,1087|1086,dj|2026-03-06|25.8
+1087|-41|87|26.1|87.3|dj|2027-04-07 01:01:01.111111111|dj|dj|dj|dj:dj|1087,1088|1087,dk|2027-04-07|26.1
+1088|-40|88|26.4|88.3|dk|2028-05-08 01:01:01.111111111|dk|dk|dk|dk:dk|1088,1089|1088,dl|2028-05-08|26.4
+1089|-39|89|26.7|89.3|dl|2029-06-09 01:01:01.111111111|dl|dl|dl|dl:dl|1089,1090|1089,dm|2029-06-09|26.7
+1090|-38|90|27.0|90.3|dm|2030-07-10 01:01:01.111111111|dm|dm|dm|dm:dm|1090,1091|1090,dn|2030-07-10|27.0
+1091|-37|91|27.3|91.3|dn|2031-08-11 01:01:01.111111111|dn|dn|dn|dn:dn|1091,1092|1091,do|2031-08-11|27.3
+1092|-36|92|27.6|92.3|do|2032-09-12 01:01:01.111111111|do|do|do|do:do|1092,1093|1092,dp|2032-09-12|27.6
+1093|-35|93|27.9|93.3|dp|2033-10-13 01:01:01.111111111|dp|dp|dp|dp:dp|1093,1094|1093,dq|2033-10-13|27.9
+1094|-34|94|28.2|94.3|dq|2034-11-14 01:01:01.111111111|dq|dq|dq|dq:dq|1094,1095|1094,dr|2034-11-14|28.2
+1095|-33|95|28.5|95.3|dr|2035-12-15 01:01:01.111111111|dr|dr|dr|dr:dr|1095,1096|1095,ds|2035-12-15|28.5
+1096|-32|96|28.8|96.3|ds|2036-01-16 01:01:01.111111111|ds|ds|ds|ds:ds|1096,1097|1096,dt|2036-01-16|28.8
+1097|-31|97|29.1|97.3|dt|2037-02-17 01:01:01.111111111|dt|dt|dt|dt:dt|1097,1098|1097,du|2037-02-17|29.1
+1098|-30|98|29.4|98.3|du|2038-03-18 01:01:01.111111111|du|du|du|du:du|1098,1099|1098,dv|2038-03-18|29.4
+1099|-29|99|29.7|99.3|dv|2039-04-19 01:01:01.111111111|dv|dv|dv|dv:dv|1099,1100|1099,dw|2039-04-19|29.7
+1100|-28|100|30.0|100.3|dw|2040-05-20 01:01:01.111111111|dw|dw|dw|dw:dw|1100,1101|1100,dx|2040-05-20|30.0
+1101|-27|101|30.3|101.3|dx|2041-06-21 01:01:01.111111111|dx|dx|dx|dx:dx|1101,1102|1101,dy|2041-06-21|30.3
+1102|-26|102|30.6|102.3|dy|2042-07-22 01:01:01.111111111|dy|dy|dy|dy:dy|1102,1103|1102,dz|2042-07-22|30.6
+1103|-25|103|30.9|103.3|dz|2043-08-23 01:01:01.111111111|dz|dz|dz|dz:dz|1103,1104|1103,ea|2043-08-23|30.9
+1104|-24|104|31.2|104.3|ea|2044-09-24 01:01:01.111111111|ea|ea|ea|ea:ea|1104,1105|1104,eb|2044-09-24|31.2
+1105|-23|105|31.5|105.3|eb|2045-10-25 01:01:01.111111111|eb|eb|eb|eb:eb|1105,1106|1105,ec|2045-10-25|31.5
+1106|-22|106|31.8|106.3|ec|2046-11-26 01:01:01.111111111|ec|ec|ec|ec:ec|1106,1107|1106,ed|2046-11-26|31.8
+1107|-21|107|32.1|107.3|ed|2047-12-27 01:01:01.111111111|ed|ed|ed|ed:ed|1107,1108|1107,ee|2047-12-27|32.1
+1108|-20|108|32.4|108.3|ee|2048-01-01 01:01:01.111111111|ee|ee|ee|ee:ee|1108,1109|1108,ef|2048-01-01|32.4
+1109|-19|109|32.7|109.3|ef|2049-02-02 01:01:01.111111111|ef|ef|ef|ef:ef|1109,1110|1109,eg|2049-02-02|32.7
+1110|-18|110|33.0|110.3|eg|2050-03-03 01:01:01.111111111|eg|eg|eg|eg:eg|1110,1111|1110,eh|2050-03-03|33.0
+1111|-17|111|33.3|111.3|eh|2051-04-04 01:01:01.111111111|eh|eh|eh|eh:eh|1111,1112|1111,ei|2051-04-04|33.3
+1112|-16|112|33.6|112.3|ei|2052-05-05 01:01:01.111111111|ei|ei|ei|ei:ei|1112,1113|1112,ej|2052-05-05|33.6
+1113|-15|113|33.9|113.3|ej|2053-06-06 01:01:01.111111111|ej|ej|ej|ej:ej|1113,1114|1113,ek|2053-06-06|33.9
+1114|-14|114|34.2|114.3|ek|2054-07-07 01:01:01.111111111|ek|ek|ek|ek:ek|1114,1115|1114,el|2054-07-07|34.2
+1115|-13|115|34.5|115.3|el|2055-08-08 01:01:01.111111111|el|el|el|el:el|1115,1116|1115,em|2055-08-08|34.5
+1116|-12|116|34.8|116.3|em|2056-09-09 01:01:01.111111111|em|em|em|em:em|1116,1117|1116,en|2056-09-09|34.8
+1117|-11|117|35.1|117.3|en|2057-10-10 01:01:01.111111111|en|en|en|en:en|1117,1118|1117,eo|2057-10-10|35.1
+1118|-10|118|35.4|118.3|eo|2058-11-11 01:01:01.111111111|eo|eo|eo|eo:eo|1118,1119|1118,ep|2058-11-11|35.4
+1119|-9|119|35.7|119.3|ep|2059-12-12 01:01:01.111111111|ep|ep|ep|ep:ep|1119,1120|1119,eq|2059-12-12|35.7
+1120|-8|120|36.0|120.3|eq|2060-01-13 01:01:01.111111111|eq|eq|eq|eq:eq|1120,1121|1120,er|2060-01-13|36.0
+1121|-7|121|36.3|121.3|er|2061-02-14 01:01:01.111111111|er|er|er|er:er|1121,1122|1121,es|2061-02-14|36.3
+1122|-6|122|36.6|122.3|es|2062-03-15 01:01:01.111111111|es|es|es|es:es|1122,1123|1122,et|2062-03-15|36.6
+1123|-5|123|36.9|123.3|et|2063-04-16 01:01:01.111111111|et|et|et|et:et|1123,1124|1123,eu|2063-04-16|36.9
+1124|-4|124|37.2|124.3|eu|2064-05-17 01:01:01.111111111|eu|eu|eu|eu:eu|1124,1125|1124,ev|2064-05-17|37.2
+1125|-3|125|37.5|125.3|ev|2065-06-18 01:01:01.111111111|ev|ev|ev|ev:ev|1125,1126|1125,ew|2065-06-18|37.5
+1126|-2|126|37.8|126.3|ew|2066-07-19 01:01:01.111111111|ew|ew|ew|ew:ew|1126,1127|1126,ex|2066-07-19|37.8
+1127|-1|127|38.1|127.3|ex|2067-08-20 01:01:01.111111111|ex|ex|ex|ex:ex|1127,1128|1127,ey|2067-08-20|38.1
+1128|0|128|38.4|128.3|ey|2068-09-21 01:01:01.111111111|ey|ey|ey|ey:ey|1128,1129|1128,ez|2068-09-21|38.4
+1129|1|129|38.7|129.3|ez|2069-10-22 01:01:01.111111111|ez|ez|ez|ez:ez|1129,1130|1129,fa|2069-10-22|38.7
+1130|2|130|39.0|130.3|fa|2070-11-23 01:01:01.111111111|fa|fa|fa|fa:fa|1130,1131|1130,fb|2070-11-23|39.0
+1131|3|131|39.3|131.3|fb|2071-12-24 01:01:01.111111111|fb|fb|fb|fb:fb|1131,1132|1131,fc|2071-12-24|39.3
+1132|4|132|39.6|132.3|fc|2072-01-25 01:01:01.111111111|fc|fc|fc|fc:fc|1132,1133|1132,fd|2072-01-25|39.6
+1133|5|133|39.9|133.3|fd|2073-02-26 01:01:01.111111111|fd|fd|fd|fd:fd|1133,1134|1133,fe|2073-02-26|39.9
+1134|6|134|40.2|134.3|fe|2074-03-27 01:01:01.111111111|fe|fe|fe|fe:fe|1134,1135|1134,ff|2074-03-27|40.2
+1135|7|135|40.5|135.3|ff|2075-04-01 01:01:01.111111111|ff|ff|ff|ff:ff|1135,1136|1135,fg|2075-04-01|40.5
+1136|8|136|40.8|136.3|fg|2076-05-02 01:01:01.111111111|fg|fg|fg|fg:fg|1136,1137|1136,fh|2076-05-02|40.8
+1137|9|137|41.1|137.3|fh|2077-06-03 01:01:01.111111111|fh|fh|fh|fh:fh|1137,1138|1137,fi|2077-06-03|41.1
+1138|10|138|41.4|138.3|fi|2078-07-04 01:01:01.111111111|fi|fi|fi|fi:fi|1138,1139|1138,fj|2078-07-04|41.4
+1139|11|139|41.7|139.3|fj|2079-08-05 01:01:01.111111111|fj|fj|fj|fj:fj|1139,1140|1139,fk|2079-08-05|41.7
+1140|12|140|42.0|140.3|fk|2080-09-06 01:01:01.111111111|fk|fk|fk|fk:fk|1140,1141|1140,fl|2080-09-06|42.0
+1141|13|141|42.3|141.3|fl|2081-10-07 01:01:01.111111111|fl|fl|fl|fl:fl|1141,1142|1141,fm|2081-10-07|42.3
+1142|14|142|42.6|142.3|fm|2082-11-08 01:01:01.111111111|fm|fm|fm|fm:fm|1142,1143|1142,fn|2082-11-08|42.6
+1143|15|143|42.9|143.3|fn|2083-12-09 01:01:01.111111111|fn|fn|fn|fn:fn|1143,1144|1143,fo|2083-12-09|42.9
+1144|16|144|43.2|144.3|fo|2084-01-10 01:01:01.111111111|fo|fo|fo|fo:fo|1144,1145|1144,fp|2084-01-10|43.2
+1145|17|145|43.5|145.3|fp|2085-02-11 01:01:01.111111111|fp|fp|fp|fp:fp|1145,1146|1145,fq|2085-02-11|43.5
+1146|18|146|43.8|146.3|fq|2086-03-12 01:01:01.111111111|fq|fq|fq|fq:fq|1146,1147|1146,fr|2086-03-12|43.8
+1147|19|147|44.1|147.3|fr|2087-04-13 01:01:01.111111111|fr|fr|fr|fr:fr|1147,1148|1147,fs|2087-04-13|44.1
+1148|20|148|44.4|148.3|fs|2088-05-14 01:01:01.111111111|fs|fs|fs|fs:fs|1148,1149|1148,ft|2088-05-14|44.4
+1149|21|149|44.7|149.3|ft|2089-06-15 01:01:01.111111111|ft|ft|ft|ft:ft|1149,1150|1149,fu|2089-06-15|44.7
+1150|22|150|45.0|150.3|fu|2090-07-16 01:01:01.111111111|fu|fu|fu|fu:fu|1150,1151|1150,fv|2090-07-16|45.0
+1151|23|151|45.3|151.3|fv|2091-08-17 01:01:01.111111111|fv|fv|fv|fv:fv|1151,1152|1151,fw|2091-08-17|45.3
+1152|24|152|45.6|152.3|fw|2092-09-18 01:01:01.111111111|fw|fw|fw|fw:fw|1152,1153|1152,fx|2092-09-18|45.6
+1153|25|153|45.9|153.3|fx|2093-10-19 01:01:01.111111111|fx|fx|fx|fx:fx|1153,1154|1153,fy|2093-10-19|45.9
+1154|26|154|46.2|154.3|fy|2094-11-20 01:01:01.111111111|fy|fy|fy|fy:fy|1154,1155|1154,fz|2094-11-20|46.2
+1155|27|155|46.5|155.3|fz|2095-12-21 01:01:01.111111111|fz|fz|fz|fz:fz|1155,1156|1155,ga|2095-12-21|46.5
+1156|28|156|46.8|156.3|ga|2096-01-22 01:01:01.111111111|ga|ga|ga|ga:ga|1156,1157|1156,gb|2096-01-22|46.8
+1157|29|157|47.1|157.3|gb|2097-02-23 01:01:01.111111111|gb|gb|gb|gb:gb|1157,1158|1157,gc|2097-02-23|47.1
+1158|30|158|47.4|158.3|gc|2098-03-24 01:01:01.111111111|gc|gc|gc|gc:gc|1158,1159|1158,gd|2098-03-24|47.4
+1159|31|159|47.7|159.3|gd|2099-04-25 01:01:01.111111111|gd|gd|gd|gd:gd|1159,1160|1159,ge|2099-04-25|47.7
+1160|32|160|48.0|160.3|ge|2100-05-26 01:01:01.111111111|ge|ge|ge|ge:ge|1160,1161|1160,gf|2100-05-26|48.0
+1161|33|161|48.3|161.3|gf|2101-06-27 01:01:01.111111111|gf|gf|gf|gf:gf|1161,1162|1161,gg|2101-06-27|48.3
+1162|34|162|48.6|162.3|gg|2102-07-01 01:01:01.111111111|gg|gg|gg|gg:gg|1162,1163|1162,gh|2102-07-01|48.6
+1163|35|163|48.9|163.3|gh|2103-08-02 01:01:01.111111111|gh|gh|gh|gh:gh|1163,1164|1163,gi|2103-08-02|48.9
+1164|36|164|49.2|164.3|gi|2104-09-03 01:01:01.111111111|gi|gi|gi|gi:gi|1164,1165|1164,gj|2104-09-03|49.2
+1165|37|165|49.5|165.3|gj|2105-10-04 01:01:01.111111111|gj|gj|gj|gj:gj|1165,1166|1165,gk|2105-10-04|49.5
+1166|38|166|49.8|166.3|gk|2106-11-05 01:01:01.111111111|gk|gk|gk|gk:gk|1166,1167|1166,gl|2106-11-05|49.8
+1167|39|167|50.1|167.3|gl|2107-12-06 01:01:01.111111111|gl|gl|gl|gl:gl|1167,1168|1167,gm|2107-12-06|50.1
+1168|40|168|50.4|168.3|gm|2108-01-07 01:01:01.111111111|gm|gm|gm|gm:gm|1168,1169|1168,gn|2108-01-07|50.4
+1169|41|169|50.7|169.3|gn|2109-02-08 01:01:01.111111111|gn|gn|gn|gn:gn|1169,1170|1169,go|2109-02-08|50.7
+1170|42|170|51.0|170.3|go|2110-03-09 01:01:01.111111111|go|go|go|go:go|1170,1171|1170,gp|2110-03-09|51.0
+1171|43|171|51.3|171.3|gp|2111-04-10 01:01:01.111111111|gp|gp|gp|gp:gp|1171,1172|1171,gq|2111-04-10|51.3
+1172|44|172|51.6|172.3|gq|2112-05-11 01:01:01.111111111|gq|gq|gq|gq:gq|1172,1173|1172,gr|2112-05-11|51.6
+1173|45|173|51.9|173.3|gr|2113-06-12 01:01:01.111111111|gr|gr|gr|gr:gr|1173,1174|1173,gs|2113-06-12|51.9
+1174|46|174|52.2|174.3|gs|2114-07-13 01:01:01.111111111|gs|gs|gs|gs:gs|1174,1175|1174,gt|2114-07-13|52.2
+1175|47|175|52.5|175.3|gt|2115-08-14 01:01:01.111111111|gt|gt|gt|gt:gt|1175,1176|1175,gu|2115-08-14|52.5
+1176|48|176|52.8|176.3|gu|2116-09-15 01:01:01.111111111|gu|gu|gu|gu:gu|1176,1177|1176,gv|2116-09-15|52.8
+1177|49|177|53.1|177.3|gv|2117-10-16 01:01:01.111111111|gv|gv|gv|gv:gv|1177,1178|1177,gw|2117-10-16|53.1
+1178|50|178|53.4|178.3|gw|2118-11-17 01:01:01.111111111|gw|gw|gw|gw:gw|1178,1179|1178,gx|2118-11-17|53.4
+1179|51|179|53.7|179.3|gx|2119-12-18 01:01:01.111111111|gx|gx|gx|gx:gx|1179,1180|1179,gy|2119-12-18|53.7
+1180|52|180|54.0|180.3|gy|2120-01-19 01:01:01.111111111|gy|gy|gy|gy:gy|1180,1181|1180,gz|2120-01-19|54.0
+1181|53|181|54.3|181.3|gz|2121-02-20 01:01:01.111111111|gz|gz|gz|gz:gz|1181,1182|1181,ha|2121-02-20|54.3
+1182|54|182|54.6|182.3|ha|2122-03-21 01:01:01.111111111|ha|ha|ha|ha:ha|1182,1183|1182,hb|2122-03-21|54.6
+1183|55|183|54.9|183.3|hb|2123-04-22 01:01:01.111111111|hb|hb|hb|hb:hb|1183,1184|1183,hc|2123-04-22|54.9
+1184|56|184|55.2|184.3|hc|2124-05-23 01:01:01.111111111|hc|hc|hc|hc:hc|1184,1185|1184,hd|2124-05-23|55.2
+1185|57|185|55.5|185.3|hd|2125-06-24 01:01:01.111111111|hd|hd|hd|hd:hd|1185,1186|1185,he|2125-06-24|55.5
+1186|58|186|55.8|186.3|he|2126-07-25 01:01:01.111111111|he|he|he|he:he|1186,1187|1186,hf|2126-07-25|55.8
+1187|59|187|56.1|187.3|hf|2127-08-26 01:01:01.111111111|hf|hf|hf|hf:hf|1187,1188|1187,hg|2127-08-26|56.1
+1188|60|188|56.4|188.3|hg|2128-09-27 01:01:01.111111111|hg|hg|hg|hg:hg|1188,1189|1188,hh|2128-09-27|56.4
+1189|61|189|56.7|189.3|hh|2129-10-01 01:01:01.111111111|hh|hh|hh|hh:hh|1189,1190|1189,hi|2129-10-01|56.7
+1190|62|190|57.0|190.3|hi|2130-11-02 01:01:01.111111111|hi|hi|hi|hi:hi|1190,1191|1190,hj|2130-11-02|57.0
+1191|63|191|57.3|191.3|hj|2131-12-03 01:01:01.111111111|hj|hj|hj|hj:hj|1191,1192|1191,hk|2131-12-03|57.3
+1192|64|192|57.6|192.3|hk|2132-01-04 01:01:01.111111111|hk|hk|hk|hk:hk|1192,1193|1192,hl|2132-01-04|57.6
+1193|65|193|57.9|193.3|hl|2133-02-05 01:01:01.111111111|hl|hl|hl|hl:hl|1193,1194|1193,hm|2133-02-05|57.9
+1194|66|194|58.2|194.3|hm|2134-03-06 01:01:01.111111111|hm|hm|hm|hm:hm|1194,1195|1194,hn|2134-03-06|58.2
+1195|67|195|58.5|195.3|hn|2135-04-07 01:01:01.111111111|hn|hn|hn|hn:hn|1195,1196|1195,ho|2135-04-07|58.5
+1196|68|196|58.8|196.3|ho|2136-05-08 01:01:01.111111111|ho|ho|ho|ho:ho|1196,1197|1196,hp|2136-05-08|58.8
+1197|69|197|59.1|197.3|hp|2137-06-09 01:01:01.111111111|hp|hp|hp|hp:hp|1197,1198|1197,hq|2137-06-09|59.1
+1198|70|198|59.4|198.3|hq|2138-07-10 01:01:01.111111111|hq|hq|hq|hq:hq|1198,1199|1198,hr|2138-07-10|59.4
+1199|71|199|59.7|199.3|hr|2139-08-11 01:01:01.111111111|hr|hr|hr|hr:hr|1199,1200|1199,hs|2139-08-11|59.7
+1200|72|200|60.0|200.3|hs|2140-09-12 01:01:01.111111111|hs|hs|hs|hs:hs|1200,1201|1200,ht|2140-09-12|60.0
+1201|73|201|60.3|201.3|ht|2141-10-13 01:01:01.111111111|ht|ht|ht|ht:ht|1201,1202|1201,hu|2141-10-13|60.3
+1202|74|202|60.6|202.3|hu|2142-11-14 01:01:01.111111111|hu|hu|hu|hu:hu|1202,1203|1202,hv|2142-11-14|60.6
+1203|75|203|60.9|203.3|hv|2143-12-15 01:01:01.111111111|hv|hv|hv|hv:hv|1203,1204|1203,hw|2143-12-15|60.9
+1204|76|204|61.2|204.3|hw|2144-01-16 01:01:01.111111111|hw|hw|hw|hw:hw|1204,1205|1204,hx|2144-01-16|61.2
+1205|77|205|61.5|205.3|hx|2145-02-17 01:01:01.111111111|hx|hx|hx|hx:hx|1205,1206|1205,hy|2145-02-17|61.5
+1206|78|206|61.8|206.3|hy|2146-03-18 01:01:01.111111111|hy|hy|hy|hy:hy|1206,1207|1206,hz|2146-03-18|61.8
+1207|79|207|62.1|207.3|hz|2147-04-19 01:01:01.111111111|hz|hz|hz|hz:hz|1207,1208|1207,ia|2147-04-19|62.1
+1208|80|208|62.4|208.3|ia|2148-05-20 01:01:01.111111111|ia|ia|ia|ia:ia|1208,1209|1208,ib|2148-05-20|62.4
+1209|81|209|62.7|209.3|ib|2149-06-21 01:01:01.111111111|ib|ib|ib|ib:ib|1209,1210|1209,ic|2149-06-21|62.7
+1210|82|210|63.0|210.3|ic|2150-07-22 01:01:01.111111111|ic|ic|ic|ic:ic|1210,1211|1210,id|2150-07-22|63.0
+1211|83|211|63.3|211.3|id|2151-08-23 01:01:01.111111111|id|id|id|id:id|1211,1212|1211,ie|2151-08-23|63.3
+1212|84|212|63.6|212.3|ie|2152-09-24 01:01:01.111111111|ie|ie|ie|ie:ie|1212,1213|1212,if|2152-09-24|63.6
+1213|85|213|63.9|213.3|if|2153-10-25 01:01:01.111111111|if|if|if|if:if|1213,1214|1213,ig|2153-10-25|63.9
+1214|86|214|64.2|214.3|ig|2154-11-26 01:01:01.111111111|ig|ig|ig|ig:ig|1214,1215|1214,ih|2154-11-26|64.2
+1215|87|215|64.5|215.3|ih|2155-12-27 01:01:01.111111111|ih|ih|ih|ih:ih|1215,1216|1215,ii|2155-12-27|64.5
+1216|88|216|64.8|216.3|ii|2156-01-01 01:01:01.111111111|ii|ii|ii|ii:ii|1216,1217|1216,ij|2156-01-01|64.8
+1217|89|217|65.1|217.3|ij|2157-02-02 01:01:01.111111111|ij|ij|ij|ij:ij|1217,1218|1217,ik|2157-02-02|65.1
+1218|90|218|65.4|218.3|ik|2158-03-03 01:01:01.111111111|ik|ik|ik|ik:ik|1218,1219|1218,il|2158-03-03|65.4
+1219|91|219|65.7|219.3|il|2159-04-04 01:01:01.111111111|il|il|il|il:il|1219,1220|1219,im|2159-04-04|65.7
+1220|92|220|66.0|220.3|im|2160-05-05 01:01:01.111111111|im|im|im|im:im|1220,1221|1220,in|2160-05-05|66.0
+1221|93|221|66.3|221.3|in|2161-06-06 01:01:01.111111111|in|in|in|in:in|1221,1222|1221,io|2161-06-06|66.3
+1222|94|222|66.6|222.3|io|2162-07-07 01:01:01.111111111|io|io|io|io:io|1222,1223|1222,ip|2162-07-07|66.6
+1223|95|223|66.9|223.3|ip|2163-08-08 01:01:01.111111111|ip|ip|ip|ip:ip|1223,1224|1223,iq|2163-08-08|66.9
+1224|96|224|67.2|224.3|iq|2164-09-09 01:01:01.111111111|iq|iq|iq|iq:iq|1224,1225|1224,ir|2164-09-09|67.2
+1225|97|225|67.5|225.3|ir|2165-10-10 01:01:01.111111111|ir|ir|ir|ir:ir|1225,1226|1225,is|2165-10-10|67.5
+1226|98|226|67.8|226.3|is|2166-11-11 01:01:01.111111111|is|is|is|is:is|1226,1227|1226,it|2166-11-11|67.8
+1227|99|227|68.1|227.3|it|2167-12-12 01:01:01.111111111|it|it|it|it:it|1227,1228|1227,iu|2167-12-12|68.1
+1228|100|228|68.4|228.3|iu|2168-01-13 01:01:01.111111111|iu|iu|iu|iu:iu|1228,1229|1228,iv|2168-01-13|68.4
+1229|101|229|68.7|229.3|iv|2169-02-14 01:01:01.111111111|iv|iv|iv|iv:iv|1229,1230|1229,iw|2169-02-14|68.7
+1230|102|230|69.0|230.3|iw|2170-03-15 01:01:01.111111111|iw|iw|iw|iw:iw|1230,1231|1230,ix|2170-03-15|69.0
+1231|103|231|69.3|231.3|ix|2171-04-16 01:01:01.111111111|ix|ix|ix|ix:ix|1231,1232|1231,iy|2171-04-16|69.3
+1232|104|232|69.6|232.3|iy|2172-05-17 01:01:01.111111111|iy|iy|iy|iy:iy|1232,1233|1232,iz|2172-05-17|69.6
+1233|105|233|69.9|233.3|iz|2173-06-18 01:01:01.111111111|iz|iz|iz|iz:iz|1233,1234|1233,ja|2173-06-18|69.9
+1234|106|234|70.2|234.3|ja|2174-07-19 01:01:01.111111111|ja|ja|ja|ja:ja|1234,1235|1234,jb|2174-07-19|70.2
+1235|107|235|70.5|235.3|jb|2175-08-20 01:01:01.111111111|jb|jb|jb|jb:jb|1235,1236|1235,jc|2175-08-20|70.5
+1236|108|236|70.8|236.3|jc|2176-09-21 01:01:01.111111111|jc|jc|jc|jc:jc|1236,1237|1236,jd|2176-09-21|70.8
+1237|109|237|71.1|237.3|jd|2177-10-22 01:01:01.111111111|jd|jd|jd|jd:jd|1237,1238|1237,je|2177-10-22|71.1
+1238|110|238|71.4|238.3|je|2178-11-23 01:01:01.111111111|je|je|je|je:je|1238,1239|1238,jf|2178-11-23|71.4
+1239|111|239|71.7|239.3|jf|2179-12-24 01:01:01.111111111|jf|jf|jf|jf:jf|1239,1240|1239,jg|2179-12-24|71.7
+1240|112|240|72.0|240.3|jg|2180-01-25 01:01:01.111111111|jg|jg|jg|jg:jg|1240,1241|1240,jh|2180-01-25|72.0
+1241|113|241|72.3|241.3|jh|2181-02-26 01:01:01.111111111|jh|jh|jh|jh:jh|1241,1242|1241,ji|2181-02-26|72.3
+1242|114|242|72.6|242.3|ji|2182-03-27 01:01:01.111111111|ji|ji|ji|ji:ji|1242,1243|1242,jj|2182-03-27|72.6
+1243|115|243|72.9|243.3|jj|2183-04-01 01:01:01.111111111|jj|jj|jj|jj:jj|1243,1244|1243,jk|2183-04-01|72.9
+1244|116|244|73.2|244.3|jk|2184-05-02 01:01:01.111111111|jk|jk|jk|jk:jk|1244,1245|1244,jl|2184-05-02|73.2
+1245|117|245|73.5|245.3|jl|2185-06-03 01:01:01.111111111|jl|jl|jl|jl:jl|1245,1246|1245,jm|2185-06-03|73.5
+1246|118|246|73.8|246.3|jm|2186-07-04 01:01:01.111111111|jm|jm|jm|jm:jm|1246,1247|1246,jn|2186-07-04|73.8
+1247|119|247|74.1|247.3|jn|2187-08-05 01:01:01.111111111|jn|jn|jn|jn:jn|1247,1248|1247,jo|2187-08-05|74.1
+1248|120|248|74.4|248.3|jo|2188-09-06 01:01:01.111111111|jo|jo|jo|jo:jo|1248,1249|1248,jp|2188-09-06|74.4
+1249|121|249|74.7|249.3|jp|2189-10-07 01:01:01.111111111|jp|jp|jp|jp:jp|1249,1250|1249,jq|2189-10-07|74.7
+1250|122|250|75.0|250.3|jq|2190-11-08 01:01:01.111111111|jq|jq|jq|jq:jq|1250,1251|1250,jr|2190-11-08|75.0
+1251|123|251|75.3|251.3|jr|2191-12-09 01:01:01.111111111|jr|jr|jr|jr:jr|1251,1252|1251,js|2191-12-09|75.3
+1252|124|252|75.6|252.3|js|2192-01-10 01:01:01.111111111|js|js|js|js:js|1252,1253|1252,jt|2192-01-10|75.6
+1253|125|253|75.9|253.3|jt|2193-02-11 01:01:01.111111111|jt|jt|jt|jt:jt|1253,1254|1253,ju|2193-02-11|75.9
+1254|126|254|76.2|254.3|ju|2194-03-12 01:01:01.111111111|ju|ju|ju|ju:ju|1254,1255|1254,jv|2194-03-12|76.2
+1255|127|255|76.5|255.3|jv|2195-04-13 01:01:01.111111111|jv|jv|jv|jv:jv|1255,1256|1255,jw|2195-04-13|76.5
+1256|-128|256|76.8|256.3|jw|2196-05-14 01:01:01.111111111|jw|jw|jw|jw:jw|1256,1257|1256,jx|2196-05-14|76.8
+1257|-127|257|77.1|257.3|jx|2197-06-15 01:01:01.111111111|jx|jx|jx|jx:jx|1257,1258|1257,jy|2197-06-15|77.1
+1258|-126|258|77.4|258.3|jy|2198-07-16 01:01:01.111111111|jy|jy|jy|jy:jy|1258,1259|1258,jz|2198-07-16|77.4
+1259|-125|259|77.7|259.3|jz|2199-08-17 01:01:01.111111111|jz|jz|jz|jz:jz|1259,1260|1259,ka|2199-08-17|77.7
+1260|-124|260|78.0|260.3|ka|2200-09-18 01:01:01.111111111|ka|ka|ka|ka:ka|1260,1261|1260,kb|2200-09-18|78.0
+1261|-123|261|78.3|261.3|kb|2201-10-19 01:01:01.111111111|kb|kb|kb|kb:kb|1261,1262|1261,kc|2201-10-19|78.3
+1262|-122|262|78.6|262.3|kc|2202-11-20 01:01:01.111111111|kc|kc|kc|kc:kc|1262,1263|1262,kd|2202-11-20|78.6
+1263|-121|263|78.9|263.3|kd|2203-12-21 01:01:01.111111111|kd|kd|kd|kd:kd|1263,1264|1263,ke|2203-12-21|78.9
+1264|-120|264|79.2|264.3|ke|2204-01-22 01:01:01.111111111|ke|ke|ke|ke:ke|1264,1265|1264,kf|2204-01-22|79.2
+1265|-119|265|79.5|265.3|kf|2205-02-23 01:01:01.111111111|kf|kf|kf|kf:kf|1265,1266|1265,kg|2205-02-23|79.5
+1266|-118|266|79.8|266.3|kg|2206-03-24 01:01:01.111111111|kg|kg|kg|kg:kg|1266,1267|1266,kh|2206-03-24|79.8
+1267|-117|267|80.1|267.3|kh|2207-04-25 01:01:01.111111111|kh|kh|kh|kh:kh|1267,1268|1267,ki|2207-04-25|80.1
+1268|-116|268|80.4|268.3|ki|2208-05-26 01:01:01.111111111|ki|ki|ki|ki:ki|1268,1269|1268,kj|2208-05-26|80.4
+1269|-115|269|80.7|269.3|kj|2209-06-27 01:01:01.111111111|kj|kj|kj|kj:kj|1269,1270|1269,kk|2209-06-27|80.7
+1270|-114|270|81.0|270.3|kk|2210-07-01 01:01:01.111111111|kk|kk|kk|kk:kk|1270,1271|1270,kl|2210-07-01|81.0
+1271|-113|271|81.3|271.3|kl|2211-08-02 01:01:01.111111111|kl|kl|kl|kl:kl|1271,1272|1271,km|2211-08-02|81.3
+1272|-112|272|81.6|272.3|km|2212-09-03 01:01:01.111111111|km|km|km|km:km|1272,1273|1272,kn|2212-09-03|81.6
+1273|-111|273|81.9|273.3|kn|2213-10-04 01:01:01.111111111|kn|kn|kn|kn:kn|1273,1274|1273,ko|2213-10-04|81.9
+1274|-110|274|82.2|274.3|ko|2214-11-05 01:01:01.111111111|ko|ko|ko|ko:ko|1274,1275|1274,kp|2214-11-05|82.2
+1275|-109|275|82.5|275.3|kp|2215-12-06 01:01:01.111111111|kp|kp|kp|kp:kp|1275,1276|1275,kq|2215-12-06|82.5
+1276|-108|276|82.8|276.3|kq|2216-01-07 01:01:01.111111111|kq|kq|kq|kq:kq|1276,1277|1276,kr|2216-01-07|82.8
+1277|-107|277|83.1|277.3|kr|2217-02-08 01:01:01.111111111|kr|kr|kr|kr:kr|1277,1278|1277,ks|2217-02-08|83.1
+1278|-106|278|83.4|278.3|ks|2218-03-09 01:01:01.111111111|ks|ks|ks|ks:ks|1278,1279|1278,kt|2218-03-09|83.4
+1279|-105|279|83.7|279.3|kt|2219-04-10 01:01:01.111111111|kt|kt|kt|kt:kt|1279,1280|1279,ku|2219-04-10|83.7
+1280|-104|280|84.0|280.3|ku|2220-05-11 01:01:01.111111111|ku|ku|ku|ku:ku|1280,1281|1280,kv|2220-05-11|84.0
+1281|-103|281|84.3|281.3|kv|2221-06-12 01:01:01.111111111|kv|kv|kv|kv:kv|1281,1282|1281,kw|2221-06-12|84.3
+1282|-102|282|84.6|282.3|kw|2222-07-13 01:01:01.111111111|kw|kw|kw|kw:kw|1282,1283|1282,kx|2222-07-13|84.6
+1283|-101|283|84.9|283.3|kx|2223-08-14 01:01:01.111111111|kx|kx|kx|kx:kx|1283,1284|1283,ky|2223-08-14|84.9
+1284|-100|284|85.2|284.3|ky|2224-09-15 01:01:01.111111111|ky|ky|ky|ky:ky|1284,1285|1284,kz|2224-09-15|85.2
+1285|-99|285|85.5|285.3|kz|2225-10-16 01:01:01.111111111|kz|kz|kz|kz:kz|1285,1286|1285,la|2225-10-16|85.5
+1286|-98|286|85.8|286.3|la|2226-11-17 01:01:01.111111111|la|la|la|la:la|1286,1287|1286,lb|2226-11-17|85.8
+1287|-97|287|86.1|287.3|lb|2227-12-18 01:01:01.111111111|lb|lb|lb|lb:lb|1287,1288|1287,lc|2227-12-18|86.1
+1288|-96|288|86.4|288.3|lc|2228-01-19 01:01:01.111111111|lc|lc|lc|lc:lc|1288,1289|1288,ld|2228-01-19|86.4
+1289|-95|289|86.7|289.3|ld|2229-02-20 01:01:01.111111111|ld|ld|ld|ld:ld|1289,1290|1289,le|2229-02-20|86.7
+1290|-94|290|87.0|290.3|le|2230-03-21 01:01:01.111111111|le|le|le|le:le|1290,1291|1290,lf|2230-03-21|87.0
+1291|-93|291|87.3|291.3|lf|2231-04-22 01:01:01.111111111|lf|lf|lf|lf:lf|1291,1292|1291,lg|2231-04-22|87.3
+1292|-92|292|87.6|292.3|lg|2232-05-23 01:01:01.111111111|lg|lg|lg|lg:lg|1292,1293|1292,lh|2232-05-23|87.6
+1293|-91|293|87.9|293.3|lh|2233-06-24 01:01:01.111111111|lh|lh|lh|lh:lh|1293,1294|1293,li|2233-06-24|87.9
+1294|-90|294|88.2|294.3|li|2234-07-25 01:01:01.111111111|li|li|li|li:li|1294,1295|1294,lj|2234-07-25|88.2
+1295|-89|295|88.5|295.3|lj|2235-08-26 01:01:01.111111111|lj|lj|lj|lj:lj|1295,1296|1295,lk|2235-08-26|88.5
+1296|-88|296|88.8|296.3|lk|2236-09-27 01:01:01.111111111|lk|lk|lk|lk:lk|1296,1297|1296,ll|2236-09-27|88.8
+1297|-87|297|89.1|297.3|ll|2237-10-01 01:01:01.111111111|ll|ll|ll|ll:ll|1297,1298|1297,lm|2237-10-01|89.1
+1298|-86|298|89.4|298.3|lm|2238-11-02 01:01:01.111111111|lm|lm|lm|lm:lm|1298,1299|1298,ln|2238-11-02|89.4
+1299|-85|299|89.7|299.3|ln|2239-12-03 01:01:01.111111111|ln|ln|ln|ln:ln|1299,1300|1299,lo|2239-12-03|89.7
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/936df7a1/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetInputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetInputFormat.java
index 5b65e5c..f4fadbb 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetInputFormat.java
@@ -14,6 +14,8 @@
package org.apache.hadoop.hive.ql.io.parquet;
import java.io.IOException;
+
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedInputFormatInterface;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.hive.ql.exec.Utilities;
@@ -34,7 +36,8 @@ import org.apache.parquet.hadoop.ParquetInputFormat;
* NOTE: With HIVE-9235 we removed "implements VectorizedParquetInputFormat" since all data types
* are not currently supported. Removing the interface turns off vectorization.
*/
-public class MapredParquetInputFormat extends FileInputFormat<NullWritable, ArrayWritable> {
+public class MapredParquetInputFormat extends FileInputFormat<NullWritable, ArrayWritable>
+ implements VectorizedInputFormatInterface {
private static final Logger LOG = LoggerFactory.getLogger(MapredParquetInputFormat.class);
@@ -48,7 +51,7 @@ public class MapredParquetInputFormat extends FileInputFormat<NullWritable, Arra
protected MapredParquetInputFormat(final ParquetInputFormat<ArrayWritable> inputFormat) {
this.realInput = inputFormat;
- vectorizedSelf = new VectorizedParquetInputFormat(inputFormat);
+ vectorizedSelf = new VectorizedParquetInputFormat();
}
@SuppressWarnings({ "unchecked", "rawtypes" })
@@ -69,8 +72,7 @@ public class MapredParquetInputFormat extends FileInputFormat<NullWritable, Arra
if (LOG.isDebugEnabled()) {
LOG.debug("Using row-mode record reader");
}
- return (RecordReader<NullWritable, ArrayWritable>)
- new ParquetRecordReaderWrapper(realInput, split, job, reporter);
+ return new ParquetRecordReaderWrapper(realInput, split, job, reporter);
}
} catch (final InterruptedException e) {
throw new RuntimeException("Cannot create a RecordReaderWrapper", e);
http://git-wip-us.apache.org/repos/asf/hive/blob/936df7a1/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/ParquetRecordReaderBase.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/ParquetRecordReaderBase.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/ParquetRecordReaderBase.java
new file mode 100644
index 0000000..167f9b6
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/ParquetRecordReaderBase.java
@@ -0,0 +1,171 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.parquet;
+
+import com.google.common.base.Strings;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.io.parquet.read.DataWritableReadSupport;
+import org.apache.hadoop.hive.ql.io.parquet.read.ParquetFilterPredicateConverter;
+import org.apache.hadoop.hive.ql.io.sarg.ConvertAstToSearchArg;
+import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
+import org.apache.hadoop.hive.serde2.SerDeStats;
+import org.apache.hadoop.mapred.FileSplit;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.parquet.filter2.compat.FilterCompat;
+import org.apache.parquet.filter2.compat.RowGroupFilter;
+import org.apache.parquet.filter2.predicate.FilterPredicate;
+import org.apache.parquet.hadoop.ParquetFileReader;
+import org.apache.parquet.hadoop.ParquetInputFormat;
+import org.apache.parquet.hadoop.ParquetInputSplit;
+import org.apache.parquet.hadoop.api.InitContext;
+import org.apache.parquet.hadoop.api.ReadSupport;
+import org.apache.parquet.hadoop.metadata.BlockMetaData;
+import org.apache.parquet.hadoop.metadata.FileMetaData;
+import org.apache.parquet.hadoop.metadata.ParquetMetadata;
+import org.apache.parquet.schema.MessageType;
+import org.apache.parquet.schema.MessageTypeParser;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+public class ParquetRecordReaderBase {
+ public static final Logger LOG = LoggerFactory.getLogger(ParquetRecordReaderBase.class);
+
+ protected Path file;
+ protected ProjectionPusher projectionPusher;
+ protected boolean skipTimestampConversion = false;
+ protected SerDeStats serDeStats;
+ protected JobConf jobConf;
+
+ protected int schemaSize;
+ protected List<BlockMetaData> filtedBlocks;
+ protected ParquetFileReader reader;
+
+ /**
+ * gets a ParquetInputSplit corresponding to a split given by Hive
+ *
+ * @param oldSplit The split given by Hive
+ * @param conf The JobConf of the Hive job
+ * @return a ParquetInputSplit corresponding to the oldSplit
+ * @throws IOException if the config cannot be enhanced or if the footer cannot be read from the file
+ */
+ @SuppressWarnings("deprecation")
+ protected ParquetInputSplit getSplit(
+ final org.apache.hadoop.mapred.InputSplit oldSplit,
+ final JobConf conf
+ ) throws IOException {
+ ParquetInputSplit split;
+ if (oldSplit instanceof FileSplit) {
+ final Path finalPath = ((FileSplit) oldSplit).getPath();
+ jobConf = projectionPusher.pushProjectionsAndFilters(conf, finalPath.getParent());
+
+ // TODO enable MetadataFilter by using readFooter(Configuration configuration, Path file,
+ // MetadataFilter filter) API
+ final ParquetMetadata parquetMetadata = ParquetFileReader.readFooter(jobConf, finalPath);
+ final List<BlockMetaData> blocks = parquetMetadata.getBlocks();
+ final FileMetaData fileMetaData = parquetMetadata.getFileMetaData();
+
+ final ReadSupport.ReadContext
+ readContext = new DataWritableReadSupport().init(new InitContext(jobConf,
+ null, fileMetaData.getSchema()));
+
+ // Compute stats
+ for (BlockMetaData bmd : blocks) {
+ serDeStats.setRowCount(serDeStats.getRowCount() + bmd.getRowCount());
+ serDeStats.setRawDataSize(serDeStats.getRawDataSize() + bmd.getTotalByteSize());
+ }
+
+ schemaSize = MessageTypeParser.parseMessageType(readContext.getReadSupportMetadata()
+ .get(DataWritableReadSupport.HIVE_TABLE_AS_PARQUET_SCHEMA)).getFieldCount();
+ final List<BlockMetaData> splitGroup = new ArrayList<BlockMetaData>();
+ final long splitStart = ((FileSplit) oldSplit).getStart();
+ final long splitLength = ((FileSplit) oldSplit).getLength();
+ for (final BlockMetaData block : blocks) {
+ final long firstDataPage = block.getColumns().get(0).getFirstDataPageOffset();
+ if (firstDataPage >= splitStart && firstDataPage < splitStart + splitLength) {
+ splitGroup.add(block);
+ }
+ }
+ if (splitGroup.isEmpty()) {
+ LOG.warn("Skipping split, could not find row group in: " + oldSplit);
+ return null;
+ }
+
+ FilterCompat.Filter filter = setFilter(jobConf, fileMetaData.getSchema());
+ if (filter != null) {
+ filtedBlocks = RowGroupFilter.filterRowGroups(filter, splitGroup, fileMetaData.getSchema());
+ if (filtedBlocks.isEmpty()) {
+ LOG.debug("All row groups are dropped due to filter predicates");
+ return null;
+ }
+
+ long droppedBlocks = splitGroup.size() - filtedBlocks.size();
+ if (droppedBlocks > 0) {
+ LOG.debug("Dropping " + droppedBlocks + " row groups that do not pass filter predicate");
+ }
+ } else {
+ filtedBlocks = splitGroup;
+ }
+
+ if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_PARQUET_TIMESTAMP_SKIP_CONVERSION)) {
+ skipTimestampConversion = !Strings.nullToEmpty(fileMetaData.getCreatedBy()).startsWith("parquet-mr");
+ }
+ split = new ParquetInputSplit(finalPath,
+ splitStart,
+ splitLength,
+ oldSplit.getLocations(),
+ filtedBlocks,
+ readContext.getRequestedSchema().toString(),
+ fileMetaData.getSchema().toString(),
+ fileMetaData.getKeyValueMetaData(),
+ readContext.getReadSupportMetadata());
+ return split;
+ } else {
+ throw new IllegalArgumentException("Unknown split type: " + oldSplit);
+ }
+ }
+
+ public FilterCompat.Filter setFilter(final JobConf conf, MessageType schema) {
+ SearchArgument sarg = ConvertAstToSearchArg.createFromConf(conf);
+ if (sarg == null) {
+ return null;
+ }
+
+ // Create the Parquet FilterPredicate without including columns that do not exist
+ // on the schema (such as partition columns).
+ FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema);
+ if (p != null) {
+ // Filter may have sensitive information. Do not send to debug.
+ LOG.debug("PARQUET predicate push down generated.");
+ ParquetInputFormat.setFilterPredicate(conf, p);
+ return FilterCompat.get(p);
+ } else {
+ // Filter may have sensitive information. Do not send to debug.
+ LOG.debug("No PARQUET predicate push down is generated.");
+ return null;
+ }
+ }
+
+ public List<BlockMetaData> getFiltedBlocks() {
+ return filtedBlocks;
+ }
+
+ public SerDeStats getStats() {
+ return serDeStats;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/936df7a1/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/VectorizedParquetInputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/VectorizedParquetInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/VectorizedParquetInputFormat.java
index 2072533..322178a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/VectorizedParquetInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/VectorizedParquetInputFormat.java
@@ -15,147 +15,29 @@ package org.apache.hadoop.hive.ql.io.parquet;
import java.io.IOException;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import org.apache.hadoop.hive.ql.exec.Utilities;
-import org.apache.hadoop.hive.ql.exec.vector.VectorColumnAssign;
-import org.apache.hadoop.hive.ql.exec.vector.VectorColumnAssignFactory;
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedInputFormatInterface;
+import org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedParquetRecordReader;
+import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx;
-import org.apache.hadoop.hive.ql.io.parquet.read.ParquetRecordReaderWrapper;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.plan.MapWork;
-import org.apache.hadoop.io.ArrayWritable;
import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.FileInputFormat;
-import org.apache.hadoop.mapred.FileSplit;
-import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;
-import org.apache.parquet.hadoop.ParquetInputFormat;
/**
* Vectorized input format for Parquet files
*/
-public class VectorizedParquetInputFormat extends FileInputFormat<NullWritable, VectorizedRowBatch>
- implements VectorizedInputFormatInterface {
-
- private static final Logger LOG = LoggerFactory.getLogger(VectorizedParquetInputFormat.class);
-
- /**
- * Vectorized record reader for vectorized Parquet input format
- */
- private static class VectorizedParquetRecordReader implements
- RecordReader<NullWritable, VectorizedRowBatch> {
- private static final Logger LOG = LoggerFactory.getLogger(VectorizedParquetRecordReader.class);
-
- private final ParquetRecordReaderWrapper internalReader;
- private VectorizedRowBatchCtx rbCtx;
- private Object[] partitionValues;
- private ArrayWritable internalValues;
- private NullWritable internalKey;
- private VectorColumnAssign[] assigners;
-
- public VectorizedParquetRecordReader(
- ParquetInputFormat<ArrayWritable> realInput,
- FileSplit split,
- JobConf conf, Reporter reporter) throws IOException, InterruptedException {
- internalReader = new ParquetRecordReaderWrapper(
- realInput,
- split,
- conf,
- reporter);
- rbCtx = Utilities.getVectorizedRowBatchCtx(conf);
- int partitionColumnCount = rbCtx.getPartitionColumnCount();
- if (partitionColumnCount > 0) {
- partitionValues = new Object[partitionColumnCount];
- rbCtx.getPartitionValues(rbCtx, conf, split, partitionValues);
- }
- }
-
- @Override
- public NullWritable createKey() {
- internalKey = internalReader.createKey();
- return NullWritable.get();
- }
-
- @Override
- public VectorizedRowBatch createValue() {
- VectorizedRowBatch outputBatch;
- outputBatch = rbCtx.createVectorizedRowBatch();
- internalValues = internalReader.createValue();
- return outputBatch;
- }
-
- @Override
- public long getPos() throws IOException {
- return internalReader.getPos();
- }
+public class VectorizedParquetInputFormat
+ extends FileInputFormat<NullWritable, VectorizedRowBatch> {
- @Override
- public void close() throws IOException {
- internalReader.close();
- }
-
- @Override
- public float getProgress() throws IOException {
- return internalReader.getProgress();
- }
-
- @Override
- public boolean next(NullWritable key, VectorizedRowBatch outputBatch)
- throws IOException {
- if (assigners != null) {
- assert(outputBatch.numCols == assigners.length);
- }
- outputBatch.reset();
- int maxSize = outputBatch.getMaxSize();
- try {
- while (outputBatch.size < maxSize) {
- if (false == internalReader.next(internalKey, internalValues)) {
- outputBatch.endOfFile = true;
- break;
- }
- Writable[] writables = internalValues.get();
-
- if (null == assigners) {
- // Normally we'd build the assigners from the rbCtx.rowOI, but with Parquet
- // we have a discrepancy between the metadata type (Eg. tinyint -> BYTE) and
- // the writable value (IntWritable). see Parquet's ETypeConverter class.
- assigners = VectorColumnAssignFactory.buildAssigners(outputBatch, writables);
- }
-
- for(int i = 0; i < writables.length; ++i) {
- assigners[i].assignObjectValue(writables[i], outputBatch.size);
- }
- ++outputBatch.size;
- }
- } catch (HiveException e) {
- throw new RuntimeException(e);
- }
- return outputBatch.size > 0;
- }
+ public VectorizedParquetInputFormat() {
}
- private final ParquetInputFormat<ArrayWritable> realInput;
-
- public VectorizedParquetInputFormat(ParquetInputFormat<ArrayWritable> realInput) {
- this.realInput = realInput;
- }
-
- @SuppressWarnings("unchecked")
@Override
public RecordReader<NullWritable, VectorizedRowBatch> getRecordReader(
- InputSplit split, JobConf conf, Reporter reporter) throws IOException {
- try {
- return (RecordReader<NullWritable, VectorizedRowBatch>)
- new VectorizedParquetRecordReader(realInput, (FileSplit) split, conf, reporter);
- } catch (final InterruptedException e) {
- throw new RuntimeException("Cannot create a VectorizedParquetRecordReader", e);
- }
+ InputSplit inputSplit,
+ JobConf jobConf,
+ Reporter reporter) throws IOException {
+ return new VectorizedParquetRecordReader(inputSplit, jobConf);
}
-
}
http://git-wip-us.apache.org/repos/asf/hive/blob/936df7a1/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java
index 8d8b0c5..16064b2 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java
@@ -70,7 +70,7 @@ public class DataWritableReadSupport extends ReadSupport<ArrayWritable> {
* @param columns comma separated list of columns
* @return list with virtual columns removed
*/
- private static List<String> getColumnNames(final String columns) {
+ public static List<String> getColumnNames(final String columns) {
return (List<String>) VirtualColumn.
removeVirtualColumns(StringUtils.getStringCollection(columns));
}
@@ -82,7 +82,7 @@ public class DataWritableReadSupport extends ReadSupport<ArrayWritable> {
* @param types Comma separated list of types
* @return A list of TypeInfo objects.
*/
- private static List<TypeInfo> getColumnTypes(final String types) {
+ public static List<TypeInfo> getColumnTypes(final String types) {
return TypeInfoUtils.getTypeInfosFromTypeString(types);
}
@@ -177,7 +177,7 @@ public class DataWritableReadSupport extends ReadSupport<ArrayWritable> {
* @param colTypes List of column types.
* @return A MessageType object of projected columns.
*/
- private static MessageType getSchemaByName(MessageType schema, List<String> colNames, List<TypeInfo> colTypes) {
+ public static MessageType getSchemaByName(MessageType schema, List<String> colNames, List<TypeInfo> colTypes) {
List<Type> projectedFields = getProjectedGroupFields(schema, colNames, colTypes);
Type[] typesArray = projectedFields.toArray(new Type[0]);
@@ -195,7 +195,7 @@ public class DataWritableReadSupport extends ReadSupport<ArrayWritable> {
* @param colIndexes List of column indexes.
* @return A MessageType object of the column names found.
*/
- private static MessageType getSchemaByIndex(MessageType schema, List<String> colNames, List<Integer> colIndexes) {
+ public static MessageType getSchemaByIndex(MessageType schema, List<String> colNames, List<Integer> colIndexes) {
List<Type> schemaTypes = new ArrayList<Type>();
for (Integer i : colIndexes) {
http://git-wip-us.apache.org/repos/asf/hive/blob/936df7a1/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java
index d2e1b13..ac430a6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java
@@ -14,24 +14,19 @@
package org.apache.hadoop.hive.ql.io.parquet.read;
import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
+import org.apache.hadoop.hive.ql.io.parquet.ParquetRecordReaderBase;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.io.IOConstants;
import org.apache.hadoop.hive.ql.io.StatsProvidingRecordReader;
import org.apache.hadoop.hive.ql.io.parquet.ProjectionPusher;
-import org.apache.hadoop.hive.ql.io.sarg.ConvertAstToSearchArg;
-import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
import org.apache.hadoop.hive.serde2.SerDeStats;
import org.apache.hadoop.io.ArrayWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.mapred.FileSplit;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordReader;
@@ -39,25 +34,12 @@ import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.TaskAttemptID;
-import org.apache.parquet.filter2.compat.FilterCompat;
-import org.apache.parquet.filter2.compat.RowGroupFilter;
-import org.apache.parquet.filter2.predicate.FilterPredicate;
-import org.apache.parquet.hadoop.ParquetFileReader;
import org.apache.parquet.hadoop.ParquetInputFormat;
import org.apache.parquet.hadoop.ParquetInputSplit;
-import org.apache.parquet.hadoop.api.InitContext;
-import org.apache.parquet.hadoop.api.ReadSupport.ReadContext;
-import org.apache.parquet.hadoop.metadata.BlockMetaData;
-import org.apache.parquet.hadoop.metadata.FileMetaData;
-import org.apache.parquet.hadoop.metadata.ParquetMetadata;
import org.apache.parquet.hadoop.util.ContextUtil;
-import org.apache.parquet.schema.MessageType;
-import org.apache.parquet.schema.MessageTypeParser;
-import com.google.common.base.Strings;
-
-public class ParquetRecordReaderWrapper implements RecordReader<NullWritable, ArrayWritable>,
- StatsProvidingRecordReader {
+public class ParquetRecordReaderWrapper extends ParquetRecordReaderBase
+ implements RecordReader<NullWritable, ArrayWritable>, StatsProvidingRecordReader {
public static final Logger LOG = LoggerFactory.getLogger(ParquetRecordReaderWrapper.class);
private final long splitLen; // for getPos()
@@ -68,12 +50,6 @@ public class ParquetRecordReaderWrapper implements RecordReader<NullWritable, Ar
private ArrayWritable valueObj = null;
private boolean firstRecord = false;
private boolean eof = false;
- private int schemaSize;
- private boolean skipTimestampConversion = false;
- private JobConf jobConf;
- private final ProjectionPusher projectionPusher;
- private List<BlockMetaData> filtedBlocks;
- private final SerDeStats serDeStats;
public ParquetRecordReaderWrapper(
final ParquetInputFormat<ArrayWritable> newInputFormat,
@@ -137,27 +113,6 @@ public class ParquetRecordReaderWrapper implements RecordReader<NullWritable, Ar
}
}
- public FilterCompat.Filter setFilter(final JobConf conf, MessageType schema) {
- SearchArgument sarg = ConvertAstToSearchArg.createFromConf(conf);
- if (sarg == null) {
- return null;
- }
-
- // Create the Parquet FilterPredicate without including columns that do not exist
- // on the shema (such as partition columns).
- FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema);
- if (p != null) {
- // Filter may have sensitive information. Do not send to debug.
- LOG.debug("PARQUET predicate push down generated.");
- ParquetInputFormat.setFilterPredicate(conf, p);
- return FilterCompat.get(p);
- } else {
- // Filter may have sensitive information. Do not send to debug.
- LOG.debug("No PARQUET predicate push down is generated.");
- return null;
- }
- }
-
@Override
public void close() throws IOException {
if (realReader != null) {
@@ -227,94 +182,4 @@ public class ParquetRecordReaderWrapper implements RecordReader<NullWritable, Ar
throw new IOException(e);
}
}
-
- /**
- * gets a ParquetInputSplit corresponding to a split given by Hive
- *
- * @param oldSplit The split given by Hive
- * @param conf The JobConf of the Hive job
- * @return a ParquetInputSplit corresponding to the oldSplit
- * @throws IOException if the config cannot be enhanced or if the footer cannot be read from the file
- */
- @SuppressWarnings("deprecation")
- protected ParquetInputSplit getSplit(
- final InputSplit oldSplit,
- final JobConf conf
- ) throws IOException {
- ParquetInputSplit split;
- if (oldSplit instanceof FileSplit) {
- final Path finalPath = ((FileSplit) oldSplit).getPath();
- jobConf = projectionPusher.pushProjectionsAndFilters(conf, finalPath.getParent());
-
- final ParquetMetadata parquetMetadata = ParquetFileReader.readFooter(jobConf, finalPath);
- final List<BlockMetaData> blocks = parquetMetadata.getBlocks();
- final FileMetaData fileMetaData = parquetMetadata.getFileMetaData();
-
- final ReadContext readContext = new DataWritableReadSupport().init(new InitContext(jobConf,
- null, fileMetaData.getSchema()));
-
- // Compute stats
- for (BlockMetaData bmd : blocks) {
- serDeStats.setRowCount(serDeStats.getRowCount() + bmd.getRowCount());
- serDeStats.setRawDataSize(serDeStats.getRawDataSize() + bmd.getTotalByteSize());
- }
-
- schemaSize = MessageTypeParser.parseMessageType(readContext.getReadSupportMetadata()
- .get(DataWritableReadSupport.HIVE_TABLE_AS_PARQUET_SCHEMA)).getFieldCount();
- final List<BlockMetaData> splitGroup = new ArrayList<BlockMetaData>();
- final long splitStart = ((FileSplit) oldSplit).getStart();
- final long splitLength = ((FileSplit) oldSplit).getLength();
- for (final BlockMetaData block : blocks) {
- final long firstDataPage = block.getColumns().get(0).getFirstDataPageOffset();
- if (firstDataPage >= splitStart && firstDataPage < splitStart + splitLength) {
- splitGroup.add(block);
- }
- }
- if (splitGroup.isEmpty()) {
- LOG.warn("Skipping split, could not find row group in: " + (FileSplit) oldSplit);
- return null;
- }
-
- FilterCompat.Filter filter = setFilter(jobConf, fileMetaData.getSchema());
- if (filter != null) {
- filtedBlocks = RowGroupFilter.filterRowGroups(filter, splitGroup, fileMetaData.getSchema());
- if (filtedBlocks.isEmpty()) {
- LOG.debug("All row groups are dropped due to filter predicates");
- return null;
- }
-
- long droppedBlocks = splitGroup.size() - filtedBlocks.size();
- if (droppedBlocks > 0) {
- LOG.debug("Dropping " + droppedBlocks + " row groups that do not pass filter predicate");
- }
- } else {
- filtedBlocks = splitGroup;
- }
-
- if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_PARQUET_TIMESTAMP_SKIP_CONVERSION)) {
- skipTimestampConversion = !Strings.nullToEmpty(fileMetaData.getCreatedBy()).startsWith("parquet-mr");
- }
- split = new ParquetInputSplit(finalPath,
- splitStart,
- splitLength,
- ((FileSplit) oldSplit).getLocations(),
- filtedBlocks,
- readContext.getRequestedSchema().toString(),
- fileMetaData.getSchema().toString(),
- fileMetaData.getKeyValueMetaData(),
- readContext.getReadSupportMetadata());
- return split;
- } else {
- throw new IllegalArgumentException("Unknown split type: " + oldSplit);
- }
- }
-
- public List<BlockMetaData> getFiltedBlocks() {
- return filtedBlocks;
- }
-
- @Override
- public SerDeStats getStats() {
- return serDeStats;
- }
}
http://git-wip-us.apache.org/repos/asf/hive/blob/936df7a1/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTimeUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTimeUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTimeUtils.java
index aace48e..3fd75d2 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTimeUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTimeUtils.java
@@ -49,7 +49,7 @@ public class NanoTimeUtils {
return parquetLocalCalendar.get();
}
- private static Calendar getCalendar(boolean skipConversion) {
+ public static Calendar getCalendar(boolean skipConversion) {
Calendar calendar = skipConversion ? getLocalCalendar() : getGMTCalendar();
calendar.clear(); // Reset all fields before reusing this instance
return calendar;
http://git-wip-us.apache.org/repos/asf/hive/blob/936df7a1/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedColumnReader.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedColumnReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedColumnReader.java
new file mode 100644
index 0000000..5a9c7f9
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedColumnReader.java
@@ -0,0 +1,571 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.parquet.vector;
+
+import org.apache.commons.lang.ArrayUtils;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
+import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTime;
+import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTimeUtils;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.parquet.bytes.BytesInput;
+import org.apache.parquet.bytes.BytesUtils;
+import org.apache.parquet.column.ColumnDescriptor;
+import org.apache.parquet.column.Dictionary;
+import org.apache.parquet.column.Encoding;
+import org.apache.parquet.column.page.DataPage;
+import org.apache.parquet.column.page.DataPageV1;
+import org.apache.parquet.column.page.DataPageV2;
+import org.apache.parquet.column.page.DictionaryPage;
+import org.apache.parquet.column.page.PageReader;
+import org.apache.parquet.column.values.ValuesReader;
+import org.apache.parquet.column.values.rle.RunLengthBitPackingHybridDecoder;
+import org.apache.parquet.io.ParquetDecodingException;
+import org.apache.parquet.io.api.Binary;
+import org.apache.parquet.schema.Type;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.sql.Timestamp;
+import java.util.Arrays;
+
+import static org.apache.parquet.column.ValuesType.DEFINITION_LEVEL;
+import static org.apache.parquet.column.ValuesType.REPETITION_LEVEL;
+import static org.apache.parquet.column.ValuesType.VALUES;
+
+/**
+ * It's column level Parquet reader which is used to read a batch of records for a column,
+ * part of the code is referred from Apache Spark and Apache Parquet.
+ */
+public class VectorizedColumnReader {
+
+ private static final Logger LOG = LoggerFactory.getLogger(VectorizedColumnReader.class);
+
+ private boolean skipTimestampConversion = false;
+
+ /**
+ * Total number of values read.
+ */
+ private long valuesRead;
+
+ /**
+ * value that indicates the end of the current page. That is,
+ * if valuesRead == endOfPageValueCount, we are at the end of the page.
+ */
+ private long endOfPageValueCount;
+
+ /**
+ * The dictionary, if this column has dictionary encoding.
+ */
+ private final Dictionary dictionary;
+
+ /**
+ * If true, the current page is dictionary encoded.
+ */
+ private boolean isCurrentPageDictionaryEncoded;
+
+ /**
+ * Maximum definition level for this column.
+ */
+ private final int maxDefLevel;
+
+ private int definitionLevel;
+ private int repetitionLevel;
+
+ /**
+ * Repetition/Definition/Value readers.
+ */
+ private IntIterator repetitionLevelColumn;
+ private IntIterator definitionLevelColumn;
+ private ValuesReader dataColumn;
+
+ /**
+ * Total values in the current page.
+ */
+ private int pageValueCount;
+
+ private final PageReader pageReader;
+ private final ColumnDescriptor descriptor;
+ private final Type type;
+
+ public VectorizedColumnReader(
+ ColumnDescriptor descriptor,
+ PageReader pageReader,
+ boolean skipTimestampConversion,
+ Type type) throws IOException {
+ this.descriptor = descriptor;
+ this.type = type;
+ this.pageReader = pageReader;
+ this.maxDefLevel = descriptor.getMaxDefinitionLevel();
+ this.skipTimestampConversion = skipTimestampConversion;
+
+ DictionaryPage dictionaryPage = pageReader.readDictionaryPage();
+ if (dictionaryPage != null) {
+ try {
+ this.dictionary = dictionaryPage.getEncoding().initDictionary(descriptor, dictionaryPage);
+ this.isCurrentPageDictionaryEncoded = true;
+ } catch (IOException e) {
+ throw new IOException("could not decode the dictionary for " + descriptor, e);
+ }
+ } else {
+ this.dictionary = null;
+ this.isCurrentPageDictionaryEncoded = false;
+ }
+ }
+
+ void readBatch(
+ int total,
+ ColumnVector column,
+ TypeInfo columnType) throws IOException {
+
+ int rowId = 0;
+ while (total > 0) {
+ // Compute the number of values we want to read in this page.
+ int leftInPage = (int) (endOfPageValueCount - valuesRead);
+ if (leftInPage == 0) {
+ readPage();
+ leftInPage = (int) (endOfPageValueCount - valuesRead);
+ }
+
+ int num = Math.min(total, leftInPage);
+ if (isCurrentPageDictionaryEncoded) {
+ LongColumnVector dictionaryIds = new LongColumnVector();
+ // Read and decode dictionary ids.
+ readDictionaryIDs(num, dictionaryIds, rowId);
+ decodeDictionaryIds(rowId, num, column, dictionaryIds);
+ } else {
+ // assign values in vector
+ PrimitiveTypeInfo primitiveColumnType = (PrimitiveTypeInfo) columnType;
+ switch (primitiveColumnType.getPrimitiveCategory()) {
+ case INT:
+ case BYTE:
+ case SHORT:
+ readIntegers(num, (LongColumnVector) column, rowId);
+ break;
+ case DATE:
+ case INTERVAL_YEAR_MONTH:
+ case LONG:
+ readLongs(num, (LongColumnVector) column, rowId);
+ break;
+ case BOOLEAN:
+ readBooleans(num, (LongColumnVector) column, rowId);
+ break;
+ case DOUBLE:
+ readDoubles(num, (DoubleColumnVector) column, rowId);
+ break;
+ case BINARY:
+ case STRING:
+ case CHAR:
+ case VARCHAR:
+ readBinaries(num, (BytesColumnVector) column, rowId);
+ break;
+ case FLOAT:
+ readFloats(num, (DoubleColumnVector) column, rowId);
+ break;
+ case DECIMAL:
+ readDecimal(num, (DecimalColumnVector) column, rowId);
+ break;
+ case INTERVAL_DAY_TIME:
+ case TIMESTAMP:
+ default:
+ throw new IOException(
+ "Unsupported type category: " + primitiveColumnType.getPrimitiveCategory());
+ }
+ }
+ rowId += num;
+ total -= num;
+ }
+ }
+
+ private void readDictionaryIDs(
+ int total,
+ LongColumnVector c,
+ int rowId) throws IOException {
+ int left = total;
+ while (left > 0) {
+ readRepetitionAndDefinitionLevels();
+ if (definitionLevel >= maxDefLevel) {
+ c.vector[rowId] = dataColumn.readValueDictionaryId();
+ c.isNull[rowId] = false;
+ c.isRepeating = c.isRepeating && (c.vector[0] == c.vector[rowId]);
+ } else {
+ c.isNull[rowId] = true;
+ c.isRepeating = false;
+ c.noNulls = false;
+ }
+ rowId++;
+ left--;
+ }
+ }
+
+ private void readIntegers(
+ int total,
+ LongColumnVector c,
+ int rowId) throws IOException {
+ int left = total;
+ while (left > 0) {
+ readRepetitionAndDefinitionLevels();
+ if (definitionLevel >= maxDefLevel) {
+ c.vector[rowId] = dataColumn.readInteger();
+ c.isNull[rowId] = false;
+ c.isRepeating = c.isRepeating && (c.vector[0] == c.vector[rowId]);
+ } else {
+ c.isNull[rowId] = true;
+ c.isRepeating = false;
+ c.noNulls = false;
+ }
+ rowId++;
+ left--;
+ }
+ }
+
+ private void readDoubles(
+ int total,
+ DoubleColumnVector c,
+ int rowId) throws IOException {
+ int left = total;
+ while (left > 0) {
+ readRepetitionAndDefinitionLevels();
+ if (definitionLevel >= maxDefLevel) {
+ c.vector[rowId] = dataColumn.readDouble();
+ c.isNull[rowId] = false;
+ c.isRepeating = c.isRepeating && (c.vector[0] == c.vector[rowId]);
+ } else {
+ c.isNull[rowId] = true;
+ c.isRepeating = false;
+ c.noNulls = false;
+ }
+ rowId++;
+ left--;
+ }
+ }
+
+ private void readBooleans(
+ int total,
+ LongColumnVector c,
+ int rowId) throws IOException {
+ int left = total;
+ while (left > 0) {
+ readRepetitionAndDefinitionLevels();
+ if (definitionLevel >= maxDefLevel) {
+ c.vector[rowId] = dataColumn.readBoolean() ? 1 : 0;
+ c.isNull[rowId] = false;
+ c.isRepeating = c.isRepeating && (c.vector[0] == c.vector[rowId]);
+ } else {
+ c.isNull[rowId] = true;
+ c.isRepeating = false;
+ c.noNulls = false;
+ }
+ rowId++;
+ left--;
+ }
+ }
+
+ private void readLongs(
+ int total,
+ LongColumnVector c,
+ int rowId) throws IOException {
+ int left = total;
+ while (left > 0) {
+ readRepetitionAndDefinitionLevels();
+ if (definitionLevel >= maxDefLevel) {
+ c.vector[rowId] = dataColumn.readLong();
+ c.isNull[rowId] = false;
+ c.isRepeating = c.isRepeating && (c.vector[0] == c.vector[rowId]);
+ } else {
+ c.isNull[rowId] = true;
+ c.isRepeating = false;
+ c.noNulls = false;
+ }
+ rowId++;
+ left--;
+ }
+ }
+
+ private void readFloats(
+ int total,
+ DoubleColumnVector c,
+ int rowId) throws IOException {
+ int left = total;
+ while (left > 0) {
+ readRepetitionAndDefinitionLevels();
+ if (definitionLevel >= maxDefLevel) {
+ c.vector[rowId] = dataColumn.readFloat();
+ c.isNull[rowId] = false;
+ c.isRepeating = c.isRepeating && (c.vector[0] == c.vector[rowId]);
+ } else {
+ c.isNull[rowId] = true;
+ c.isRepeating = false;
+ c.noNulls = false;
+ }
+ rowId++;
+ left--;
+ }
+ }
+
+ private void readDecimal(
+ int total,
+ DecimalColumnVector c,
+ int rowId) throws IOException {
+ int left = total;
+ c.precision = (short) type.asPrimitiveType().getDecimalMetadata().getPrecision();
+ c.scale = (short) type.asPrimitiveType().getDecimalMetadata().getScale();
+ while (left > 0) {
+ readRepetitionAndDefinitionLevels();
+ if (definitionLevel >= maxDefLevel) {
+ c.vector[rowId].set(dataColumn.readBytes().getBytesUnsafe(), c.scale);
+ c.isNull[rowId] = false;
+ c.isRepeating = c.isRepeating && (c.vector[0] == c.vector[rowId]);
+ } else {
+ c.isNull[rowId] = true;
+ c.isRepeating = false;
+ c.noNulls = false;
+ }
+ rowId++;
+ left--;
+ }
+ }
+
+ private void readBinaries(
+ int total,
+ BytesColumnVector c,
+ int rowId) throws IOException {
+ int left = total;
+ while (left > 0) {
+ readRepetitionAndDefinitionLevels();
+ if (definitionLevel >= maxDefLevel) {
+ c.setVal(rowId, dataColumn.readBytes().getBytesUnsafe());
+ c.isNull[rowId] = false;
+ // TODO figure out a better way to set repeat for Binary type
+ c.isRepeating = false;
+ } else {
+ c.isNull[rowId] = true;
+ c.isRepeating = false;
+ c.noNulls = false;
+ }
+ rowId++;
+ left--;
+ }
+ }
+
+ /**
+ * Reads `num` values into column, decoding the values from `dictionaryIds` and `dictionary`.
+ */
+ private void decodeDictionaryIds(int rowId, int num, ColumnVector column,
+ LongColumnVector dictionaryIds) {
+ System.arraycopy(dictionaryIds.isNull, rowId, column.isNull, rowId, num);
+ if (column.noNulls) {
+ column.noNulls = dictionaryIds.noNulls;
+ }
+ column.isRepeating = column.isRepeating && dictionaryIds.isRepeating;
+
+ switch (descriptor.getType()) {
+ case INT32:
+ for (int i = rowId; i < rowId + num; ++i) {
+ ((LongColumnVector) column).vector[i] =
+ dictionary.decodeToInt((int) dictionaryIds.vector[i]);
+ }
+ break;
+ case INT64:
+ for (int i = rowId; i < rowId + num; ++i) {
+ ((LongColumnVector) column).vector[i] =
+ dictionary.decodeToLong((int) dictionaryIds.vector[i]);
+ }
+ break;
+ case FLOAT:
+ for (int i = rowId; i < rowId + num; ++i) {
+ ((DoubleColumnVector) column).vector[i] =
+ dictionary.decodeToFloat((int) dictionaryIds.vector[i]);
+ }
+ break;
+ case DOUBLE:
+ for (int i = rowId; i < rowId + num; ++i) {
+ ((DoubleColumnVector) column).vector[i] =
+ dictionary.decodeToDouble((int) dictionaryIds.vector[i]);
+ }
+ break;
+ case INT96:
+ for (int i = rowId; i < rowId + num; ++i) {
+ ByteBuffer buf = dictionary.decodeToBinary((int) dictionaryIds.vector[i]).toByteBuffer();
+ buf.order(ByteOrder.LITTLE_ENDIAN);
+ long timeOfDayNanos = buf.getLong();
+ int julianDay = buf.getInt();
+ NanoTime nt = new NanoTime(julianDay, timeOfDayNanos);
+ Timestamp ts = NanoTimeUtils.getTimestamp(nt, skipTimestampConversion);
+ ((TimestampColumnVector) column).set(i, ts);
+ }
+ break;
+ case BINARY:
+ case FIXED_LEN_BYTE_ARRAY:
+ for (int i = rowId; i < rowId + num; ++i) {
+ ((BytesColumnVector) column)
+ .setVal(i, dictionary.decodeToBinary((int) dictionaryIds.vector[i]).getBytesUnsafe());
+ }
+ break;
+ default:
+ throw new UnsupportedOperationException("Unsupported type: " + descriptor.getType());
+ }
+ }
+
+ private void readRepetitionAndDefinitionLevels() {
+ repetitionLevel = repetitionLevelColumn.nextInt();
+ definitionLevel = definitionLevelColumn.nextInt();
+ valuesRead++;
+ }
+
+ private void readPage() throws IOException {
+ DataPage page = pageReader.readPage();
+ // TODO: Why is this a visitor?
+ page.accept(new DataPage.Visitor<Void>() {
+ @Override
+ public Void visit(DataPageV1 dataPageV1) {
+ readPageV1(dataPageV1);
+ return null;
+ }
+
+ @Override
+ public Void visit(DataPageV2 dataPageV2) {
+ readPageV2(dataPageV2);
+ return null;
+ }
+ });
+ }
+
+ private void initDataReader(Encoding dataEncoding, byte[] bytes, int offset, int valueCount) throws IOException {
+ this.pageValueCount = valueCount;
+ this.endOfPageValueCount = valuesRead + pageValueCount;
+ if (dataEncoding.usesDictionary()) {
+ this.dataColumn = null;
+ if (dictionary == null) {
+ throw new IOException(
+ "could not read page in col " + descriptor +
+ " as the dictionary was missing for encoding " + dataEncoding);
+ }
+ dataColumn = dataEncoding.getDictionaryBasedValuesReader(descriptor, VALUES, dictionary);
+ this.isCurrentPageDictionaryEncoded = true;
+ } else {
+ if (dataEncoding != Encoding.PLAIN) {
+ throw new UnsupportedOperationException("Unsupported encoding: " + dataEncoding);
+ }
+ dataColumn = dataEncoding.getValuesReader(descriptor, VALUES);
+ this.isCurrentPageDictionaryEncoded = false;
+ }
+
+ try {
+ dataColumn.initFromPage(pageValueCount, bytes, offset);
+ } catch (IOException e) {
+ throw new IOException("could not read page in col " + descriptor, e);
+ }
+ }
+
+ private void readPageV1(DataPageV1 page) {
+ ValuesReader rlReader = page.getRlEncoding().getValuesReader(descriptor, REPETITION_LEVEL);
+ ValuesReader dlReader = page.getDlEncoding().getValuesReader(descriptor, DEFINITION_LEVEL);
+ this.repetitionLevelColumn = new ValuesReaderIntIterator(rlReader);
+ this.definitionLevelColumn = new ValuesReaderIntIterator(dlReader);
+ try {
+ byte[] bytes = page.getBytes().toByteArray();
+ LOG.debug("page size " + bytes.length + " bytes and " + pageValueCount + " records");
+ LOG.debug("reading repetition levels at 0");
+ rlReader.initFromPage(pageValueCount, bytes, 0);
+ int next = rlReader.getNextOffset();
+ LOG.debug("reading definition levels at " + next);
+ dlReader.initFromPage(pageValueCount, bytes, next);
+ next = dlReader.getNextOffset();
+ LOG.debug("reading data at " + next);
+ initDataReader(page.getValueEncoding(), bytes, next, page.getValueCount());
+ } catch (IOException e) {
+ throw new ParquetDecodingException("could not read page " + page + " in col " + descriptor, e);
+ }
+ }
+
+ private void readPageV2(DataPageV2 page) {
+ this.pageValueCount = page.getValueCount();
+ this.repetitionLevelColumn = newRLEIterator(descriptor.getMaxRepetitionLevel(),
+ page.getRepetitionLevels());
+ this.definitionLevelColumn = newRLEIterator(descriptor.getMaxDefinitionLevel(), page.getDefinitionLevels());
+ try {
+ LOG.debug("page data size " + page.getData().size() + " bytes and " + pageValueCount + " records");
+ initDataReader(page.getDataEncoding(), page.getData().toByteArray(), 0, page.getValueCount());
+ } catch (IOException e) {
+ throw new ParquetDecodingException("could not read page " + page + " in col " + descriptor, e);
+ }
+ }
+
+ private IntIterator newRLEIterator(int maxLevel, BytesInput bytes) {
+ try {
+ if (maxLevel == 0) {
+ return new NullIntIterator();
+ }
+ return new RLEIntIterator(
+ new RunLengthBitPackingHybridDecoder(
+ BytesUtils.getWidthFromMaxInt(maxLevel),
+ new ByteArrayInputStream(bytes.toByteArray())));
+ } catch (IOException e) {
+ throw new ParquetDecodingException("could not read levels in page for col " + descriptor, e);
+ }
+ }
+
+ /**
+ * Utility classes to abstract over different way to read ints with different encodings.
+ * TODO: remove this layer of abstraction?
+ */
+ abstract static class IntIterator {
+ abstract int nextInt();
+ }
+
+ protected static final class ValuesReaderIntIterator extends IntIterator {
+ ValuesReader delegate;
+
+ public ValuesReaderIntIterator(ValuesReader delegate) {
+ this.delegate = delegate;
+ }
+
+ @Override
+ int nextInt() {
+ return delegate.readInteger();
+ }
+ }
+
+ protected static final class RLEIntIterator extends IntIterator {
+ RunLengthBitPackingHybridDecoder delegate;
+
+ public RLEIntIterator(RunLengthBitPackingHybridDecoder delegate) {
+ this.delegate = delegate;
+ }
+
+ @Override
+ int nextInt() {
+ try {
+ return delegate.readInt();
+ } catch (IOException e) {
+ throw new ParquetDecodingException(e);
+ }
+ }
+ }
+
+ protected static final class NullIntIterator extends IntIterator {
+ @Override
+ int nextInt() { return 0; }
+ }
+}