You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2018/03/03 11:50:53 UTC
[1/7] hive git commit: HIVE-18819: Vectorization: Optimize IF
statement expression evaluation of THEN/ELSE (Matt McCline,
reviewed by Teddy Choi)
Repository: hive
Updated Branches:
refs/heads/master 1a3090f85 -> 53980ba66
http://git-wip-us.apache.org/repos/asf/hive/blob/53980ba6/ql/src/test/results/clientpositive/vector_case_when_2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_case_when_2.q.out b/ql/src/test/results/clientpositive/vector_case_when_2.q.out
new file mode 100644
index 0000000..7b09638
--- /dev/null
+++ b/ql/src/test/results/clientpositive/vector_case_when_2.q.out
@@ -0,0 +1,806 @@
+PREHOOK: query: create table timestamps_txt (tsval timestamp) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@timestamps_txt
+POSTHOOK: query: create table timestamps_txt (tsval timestamp) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@timestamps_txt
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/timestamps.txt' OVERWRITE INTO TABLE timestamps_txt
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@timestamps_txt
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/timestamps.txt' OVERWRITE INTO TABLE timestamps_txt
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@timestamps_txt
+PREHOOK: query: create table timestamps (cdate date, ctimestamp1 timestamp, stimestamp1 string, ctimestamp2 timestamp) stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@timestamps
+POSTHOOK: query: create table timestamps (cdate date, ctimestamp1 timestamp, stimestamp1 string, ctimestamp2 timestamp) stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@timestamps
+PREHOOK: query: insert overwrite table timestamps
+ select cast(tsval as date), tsval, cast(tsval as string), tsval - '1 2:3:4' day to second from timestamps_txt
+PREHOOK: type: QUERY
+PREHOOK: Input: default@timestamps_txt
+PREHOOK: Output: default@timestamps
+POSTHOOK: query: insert overwrite table timestamps
+ select cast(tsval as date), tsval, cast(tsval as string), tsval - '1 2:3:4' day to second from timestamps_txt
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@timestamps_txt
+POSTHOOK: Output: default@timestamps
+POSTHOOK: Lineage: timestamps.cdate EXPRESSION [(timestamps_txt)timestamps_txt.FieldSchema(name:tsval, type:timestamp, comment:null), ]
+POSTHOOK: Lineage: timestamps.ctimestamp1 SIMPLE [(timestamps_txt)timestamps_txt.FieldSchema(name:tsval, type:timestamp, comment:null), ]
+POSTHOOK: Lineage: timestamps.ctimestamp2 EXPRESSION [(timestamps_txt)timestamps_txt.FieldSchema(name:tsval, type:timestamp, comment:null), ]
+POSTHOOK: Lineage: timestamps.stimestamp1 EXPRESSION [(timestamps_txt)timestamps_txt.FieldSchema(name:tsval, type:timestamp, comment:null), ]
+tsval tsval _c2 _c3
+PREHOOK: query: INSERT INTO TABLE timestamps VALUES (NULL,NULL,NULL,NULL)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@timestamps
+POSTHOOK: query: INSERT INTO TABLE timestamps VALUES (NULL,NULL,NULL,NULL)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@timestamps
+POSTHOOK: Lineage: timestamps.cdate EXPRESSION []
+POSTHOOK: Lineage: timestamps.ctimestamp1 EXPRESSION []
+POSTHOOK: Lineage: timestamps.ctimestamp2 EXPRESSION []
+POSTHOOK: Lineage: timestamps.stimestamp1 EXPRESSION []
+_col0 _col1 _col2 _col3
+PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
+SELECT
+ ctimestamp1,
+ ctimestamp2,
+ CASE
+ WHEN ctimestamp2 <= date '1800-12-31' THEN "1800s or Earlier"
+ WHEN ctimestamp2 < date '1900-01-01' THEN "1900s"
+ WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s"
+ ELSE "Unknown" END AS ctimestamp2_Description,
+ CASE
+ WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old"
+ WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s"
+ WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s"
+ ELSE NULL END AS ctimestamp2_Description_2,
+ CASE
+ WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old"
+ WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s"
+ WHEN ctimestamp2 between timestamp '2006-01-01 00:00:00.000' and timestamp '2010-12-31 23:59:59.999999999' THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN NULL
+ ELSE NULL END AS ctimestamp2_Description_3,
+ IF(timestamp '1974-10-04 17:21:03.989' > ctimestamp1, year(ctimestamp1), year(ctimestamp2)) AS field1,
+ CASE WHEN stimestamp1 LIKE '%19%'
+ THEN stimestamp1
+ ELSE timestamp '2018-03-08 23:04:59' END AS Field_2,
+ IF(ctimestamp1 = timestamp '2021-09-24 03:18:32.413655165' , NULL, minute(ctimestamp1)) AS Field_3,
+ IF(ctimestamp2 >= timestamp '5344-10-04 18:40:08.165' and ctimestamp2 < timestamp '6631-11-13 16:31:29.702202248', minute(ctimestamp1), NULL) AS Field_4,
+ IF(cast(ctimestamp1 as double) % 500 > 100, DATE_ADD(cdate, 1), DATE_ADD(cdate, 365)) AS Field_5
+FROM timestamps
+ORDER BY ctimestamp1, stimestamp1, ctimestamp2
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
+SELECT
+ ctimestamp1,
+ ctimestamp2,
+ CASE
+ WHEN ctimestamp2 <= date '1800-12-31' THEN "1800s or Earlier"
+ WHEN ctimestamp2 < date '1900-01-01' THEN "1900s"
+ WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s"
+ ELSE "Unknown" END AS ctimestamp2_Description,
+ CASE
+ WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old"
+ WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s"
+ WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s"
+ ELSE NULL END AS ctimestamp2_Description_2,
+ CASE
+ WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old"
+ WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s"
+ WHEN ctimestamp2 between timestamp '2006-01-01 00:00:00.000' and timestamp '2010-12-31 23:59:59.999999999' THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN NULL
+ ELSE NULL END AS ctimestamp2_Description_3,
+ IF(timestamp '1974-10-04 17:21:03.989' > ctimestamp1, year(ctimestamp1), year(ctimestamp2)) AS field1,
+ CASE WHEN stimestamp1 LIKE '%19%'
+ THEN stimestamp1
+ ELSE timestamp '2018-03-08 23:04:59' END AS Field_2,
+ IF(ctimestamp1 = timestamp '2021-09-24 03:18:32.413655165' , NULL, minute(ctimestamp1)) AS Field_3,
+ IF(ctimestamp2 >= timestamp '5344-10-04 18:40:08.165' and ctimestamp2 < timestamp '6631-11-13 16:31:29.702202248', minute(ctimestamp1), NULL) AS Field_4,
+ IF(cast(ctimestamp1 as double) % 500 > 100, DATE_ADD(cdate, 1), DATE_ADD(cdate, 365)) AS Field_5
+FROM timestamps
+ORDER BY ctimestamp1, stimestamp1, ctimestamp2
+POSTHOOK: type: QUERY
+Explain
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: timestamps
+ Statistics: Num rows: 51 Data size: 12384 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), CASE WHEN ((ctimestamp2 <= TIMESTAMP'1800-12-31 00:00:00.0')) THEN ('1800s or Earlier') WHEN ((ctimestamp2 < TIMESTAMP'1900-01-01 00:00:00.0')) THEN ('1900s') WHEN (ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00.0' AND TIMESTAMP'2010-12-31 23:59:59.999999999') THEN ('Late 2000s') WHEN ((ctimestamp2 <= TIMESTAMP'2015-12-31 23:59:59.999999999')) THEN ('Early 2010s') ELSE ('Unknown') END (type: string), CASE WHEN ((ctimestamp2 <= TIMESTAMP'2000-12-31 23:59:59.999999999')) THEN ('Old') WHEN ((ctimestamp2 < TIMESTAMP'2006-01-01 00:00:00.0')) THEN ('Early 2000s') WHEN (ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00.0' AND TIMESTAMP'2010-12-31 23:59:59.999999999') THEN ('Late 2000s') WHEN ((ctimestamp2 <= TIMESTAMP'2015-12-31 23:59:59.999999999')) THEN ('Early 2010s') ELSE (null) END (type: string), CASE WHEN ((ctimestamp2 <= TIMESTAMP'2000-12-31 23:59:59.999999999')) THEN ('Old') WHEN ((ctimesta
mp2 < TIMESTAMP'2006-01-01 00:00:00.0')) THEN ('Early 2000s') WHEN (ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00.0' AND TIMESTAMP'2010-12-31 23:59:59.999999999') THEN ('Late 2000s') WHEN ((ctimestamp2 <= TIMESTAMP'2015-12-31 23:59:59.999999999')) THEN (null) ELSE (null) END (type: string), if((TIMESTAMP'1974-10-04 17:21:03.989' > ctimestamp1), year(ctimestamp1), year(ctimestamp2)) (type: int), CASE WHEN ((stimestamp1 like '%19%')) THEN (stimestamp1) ELSE (TIMESTAMP'2018-03-08 23:04:59.0') END (type: string), if((ctimestamp1 = TIMESTAMP'2021-09-24 03:18:32.413655165'), null, minute(ctimestamp1)) (type: int), if(((ctimestamp2 >= TIMESTAMP'5344-10-04 18:40:08.165') and (ctimestamp2 < TIMESTAMP'6631-11-13 16:31:29.702202248')), minute(ctimestamp1), null) (type: int), if(((UDFToDouble(ctimestamp1) % 500.0D) > 100.0D), date_add(cdate, 1), date_add(cdate, 365)) (type: date), stimestamp1 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
+ Statistics: Num rows: 51 Data size: 12384 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: timestamp), _col10 (type: string), _col1 (type: timestamp)
+ sort order: +++
+ Statistics: Num rows: 51 Data size: 12384 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: int), _col8 (type: int), _col9 (type: date)
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ notVectorizedReason: SELECT operator: Unexpected primitive type category VOID
+ vectorized: false
+ Reduce Vectorization:
+ enabled: false
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
+ enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey2 (type: timestamp), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: int), VALUE._col4 (type: string), VALUE._col5 (type: int), VALUE._col6 (type: int), VALUE._col7 (type: date)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
+ Statistics: Num rows: 51 Data size: 12384 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 51 Data size: 12384 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT
+ ctimestamp1,
+ ctimestamp2,
+ CASE
+ WHEN ctimestamp2 <= date '1800-12-31' THEN "1800s or Earlier"
+ WHEN ctimestamp2 < date '1900-01-01' THEN "1900s"
+ WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s"
+ ELSE "Unknown" END AS ctimestamp2_Description,
+ CASE
+ WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old"
+ WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s"
+ WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s"
+ ELSE NULL END AS ctimestamp2_Description_2,
+ CASE
+ WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old"
+ WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s"
+ WHEN ctimestamp2 between timestamp '2006-01-01 00:00:00.000' and timestamp '2010-12-31 23:59:59.999999999' THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN NULL
+ ELSE NULL END AS ctimestamp2_Description_3,
+ IF(timestamp '1974-10-04 17:21:03.989' > ctimestamp1, year(ctimestamp1), year(ctimestamp2)) AS field1,
+ CASE WHEN stimestamp1 LIKE '%19%'
+ THEN stimestamp1
+ ELSE timestamp '2018-03-08 23:04:59' END AS Field_2,
+ IF(ctimestamp1 = timestamp '2021-09-24 03:18:32.413655165' , NULL, minute(ctimestamp1)) AS Field_3,
+ IF(ctimestamp2 >= timestamp '5344-10-04 18:40:08.165' and ctimestamp2 < timestamp '6631-11-13 16:31:29.702202248', minute(ctimestamp1), NULL) AS Field_4,
+ IF(cast(ctimestamp1 as double) % 500 > 100, DATE_ADD(cdate, 1), DATE_ADD(cdate, 365)) AS Field_5
+FROM timestamps
+ORDER BY ctimestamp1, stimestamp1, ctimestamp2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@timestamps
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT
+ ctimestamp1,
+ ctimestamp2,
+ CASE
+ WHEN ctimestamp2 <= date '1800-12-31' THEN "1800s or Earlier"
+ WHEN ctimestamp2 < date '1900-01-01' THEN "1900s"
+ WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s"
+ ELSE "Unknown" END AS ctimestamp2_Description,
+ CASE
+ WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old"
+ WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s"
+ WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s"
+ ELSE NULL END AS ctimestamp2_Description_2,
+ CASE
+ WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old"
+ WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s"
+ WHEN ctimestamp2 between timestamp '2006-01-01 00:00:00.000' and timestamp '2010-12-31 23:59:59.999999999' THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN NULL
+ ELSE NULL END AS ctimestamp2_Description_3,
+ IF(timestamp '1974-10-04 17:21:03.989' > ctimestamp1, year(ctimestamp1), year(ctimestamp2)) AS field1,
+ CASE WHEN stimestamp1 LIKE '%19%'
+ THEN stimestamp1
+ ELSE timestamp '2018-03-08 23:04:59' END AS Field_2,
+ IF(ctimestamp1 = timestamp '2021-09-24 03:18:32.413655165' , NULL, minute(ctimestamp1)) AS Field_3,
+ IF(ctimestamp2 >= timestamp '5344-10-04 18:40:08.165' and ctimestamp2 < timestamp '6631-11-13 16:31:29.702202248', minute(ctimestamp1), NULL) AS Field_4,
+ IF(cast(ctimestamp1 as double) % 500 > 100, DATE_ADD(cdate, 1), DATE_ADD(cdate, 365)) AS Field_5
+FROM timestamps
+ORDER BY ctimestamp1, stimestamp1, ctimestamp2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@timestamps
+#### A masked pattern was here ####
+ctimestamp1 ctimestamp2 ctimestamp2_description ctimestamp2_description_2 ctimestamp2_description_3 field1 field_2 field_3 field_4 field_5
+NULL NULL Unknown NULL NULL NULL 2018-03-08 23:04:59 NULL NULL NULL
+0004-09-22 18:26:29.519542222 0004-09-21 16:23:25.519542222 1800s or Earlier Old Old 4 0004-09-22 18:26:29.519542222 26 NULL 0005-09-22
+0528-10-27 08:15:18.941718273 0528-10-26 06:12:14.941718273 1800s or Earlier Old Old 528 2018-03-08 23:04:59 15 NULL 0529-10-27
+1319-02-02 16:31:57.778 1319-02-01 14:28:53.778 1800s or Earlier Old Old 1319 1319-02-02 16:31:57.778 31 NULL 1320-02-02
+1404-07-23 15:32:16.059185026 1404-07-22 13:29:12.059185026 1800s or Earlier Old Old 1404 2018-03-08 23:04:59 32 NULL 1405-07-23
+1815-05-06 00:12:37.543584705 1815-05-04 22:09:33.543584705 1900s Old Old 1815 2018-03-08 23:04:59 12 NULL 1816-05-05
+1883-04-17 04:14:34.647766229 1883-04-16 02:11:30.647766229 1900s Old Old 1883 2018-03-08 23:04:59 14 NULL 1884-04-16
+1966-08-16 13:36:50.183618031 1966-08-15 11:33:46.183618031 Early 2010s Old Old 1966 1966-08-16 13:36:50.183618031 36 NULL 1967-08-16
+1973-04-17 06:30:38.596784156 1973-04-16 04:27:34.596784156 Early 2010s Old Old 1973 1973-04-17 06:30:38.596784156 30 NULL 1974-04-17
+1974-10-04 17:21:03.989 1974-10-03 15:17:59.989 Early 2010s Old Old 1974 1974-10-04 17:21:03.989 21 NULL 1974-10-05
+1976-03-03 04:54:33.000895162 1976-03-02 02:51:29.000895162 Early 2010s Old Old 1976 1976-03-03 04:54:33.000895162 54 NULL 1976-03-04
+1976-05-06 00:42:30.910786948 1976-05-04 22:39:26.910786948 Early 2010s Old Old 1976 1976-05-06 00:42:30.910786948 42 NULL 1977-05-06
+1978-08-05 14:41:05.501 1978-08-04 12:38:01.501 Early 2010s Old Old 1978 1978-08-05 14:41:05.501 41 NULL 1978-08-06
+1981-04-25 09:01:12.077192689 1981-04-24 06:58:08.077192689 Early 2010s Old Old 1981 1981-04-25 09:01:12.077192689 1 NULL 1982-04-25
+1981-11-15 23:03:10.999338387 1981-11-14 21:00:06.999338387 Early 2010s Old Old 1981 1981-11-15 23:03:10.999338387 3 NULL 1981-11-16
+1985-07-20 09:30:11 1985-07-19 07:27:07 Early 2010s Old Old 1985 1985-07-20 09:30:11 30 NULL 1986-07-20
+1985-11-18 16:37:54 1985-11-17 14:34:50 Early 2010s Old Old 1985 1985-11-18 16:37:54 37 NULL 1985-11-19
+1987-02-21 19:48:29 1987-02-20 17:45:25 Early 2010s Old Old 1987 1987-02-21 19:48:29 48 NULL 1987-02-22
+1987-05-28 13:52:07.900916635 1987-05-27 11:49:03.900916635 Early 2010s Old Old 1987 1987-05-28 13:52:07.900916635 52 NULL 1988-05-27
+1998-10-16 20:05:29.397591987 1998-10-15 18:02:25.397591987 Early 2010s Old Old 1998 1998-10-16 20:05:29.397591987 5 NULL 1999-10-16
+1999-10-03 16:59:10.396903939 1999-10-02 14:56:06.396903939 Early 2010s Old Old 1999 1999-10-03 16:59:10.396903939 59 NULL 1999-10-04
+2000-12-18 08:42:30.000595596 2000-12-17 06:39:26.000595596 Early 2010s Old Old 2000 2018-03-08 23:04:59 42 NULL 2000-12-19
+2002-05-10 05:29:48.990818073 2002-05-09 03:26:44.990818073 Early 2010s Early 2000s Early 2000s 2002 2018-03-08 23:04:59 29 NULL 2002-05-11
+2003-09-23 22:33:17.00003252 2003-09-22 20:30:13.00003252 Early 2010s Early 2000s Early 2000s 2003 2018-03-08 23:04:59 33 NULL 2004-09-22
+2004-03-07 20:14:13 2004-03-06 18:11:09 Early 2010s Early 2000s Early 2000s 2004 2018-03-08 23:04:59 14 NULL 2004-03-08
+2007-02-09 05:17:29.368756876 2007-02-08 03:14:25.368756876 Late 2000s Late 2000s Late 2000s 2007 2018-03-08 23:04:59 17 NULL 2008-02-09
+2009-01-21 10:49:07.108 2009-01-20 08:46:03.108 Late 2000s Late 2000s Late 2000s 2009 2018-03-08 23:04:59 49 NULL 2009-01-22
+2010-04-08 02:43:35.861742727 2010-04-07 00:40:31.861742727 Late 2000s Late 2000s Late 2000s 2010 2018-03-08 23:04:59 43 NULL 2010-04-09
+2013-04-07 02:44:43.00086821 2013-04-06 00:41:39.00086821 Early 2010s Early 2010s NULL 2013 2018-03-08 23:04:59 44 NULL 2013-04-08
+2013-04-10 00:43:46.854731546 2013-04-08 22:40:42.854731546 Early 2010s Early 2010s NULL 2013 2018-03-08 23:04:59 43 NULL 2013-04-11
+2021-09-24 03:18:32.413655165 2021-09-23 01:15:28.413655165 Unknown NULL NULL 2021 2018-03-08 23:04:59 NULL NULL 2021-09-25
+2024-11-11 16:42:41.101 2024-11-10 14:39:37.101 Unknown NULL NULL 2024 2018-03-08 23:04:59 42 NULL 2024-11-12
+4143-07-08 10:53:27.252802259 4143-07-07 08:50:23.252802259 Unknown NULL NULL 4143 2018-03-08 23:04:59 53 NULL 4143-07-09
+4966-12-04 09:30:55.202 4966-12-03 07:27:51.202 Unknown NULL NULL 4966 2018-03-08 23:04:59 30 NULL 4966-12-05
+5339-02-01 14:10:01.085678691 5339-01-31 12:06:57.085678691 Unknown NULL NULL 5339 2018-03-08 23:04:59 10 NULL 5340-02-01
+5344-10-04 18:40:08.165 5344-10-03 16:37:04.165 Unknown NULL NULL 5344 2018-03-08 23:04:59 40 NULL 5344-10-05
+5397-07-13 07:12:32.000896438 5397-07-12 05:09:28.000896438 Unknown NULL NULL 5397 2018-03-08 23:04:59 12 12 5397-07-14
+5966-07-09 03:30:50.597 5966-07-08 01:27:46.597 Unknown NULL NULL 5966 2018-03-08 23:04:59 30 30 5967-07-09
+6229-06-28 02:54:28.970117179 6229-06-27 00:51:24.970117179 Unknown NULL NULL 6229 2018-03-08 23:04:59 54 54 6230-06-28
+6482-04-27 12:07:38.073915413 6482-04-26 10:04:34.073915413 Unknown NULL NULL 6482 2018-03-08 23:04:59 7 7 6482-04-28
+6631-11-13 16:31:29.702202248 6631-11-12 14:28:25.702202248 Unknown NULL NULL 6631 2018-03-08 23:04:59 31 31 6631-11-14
+6705-09-28 18:27:28.000845672 6705-09-27 16:24:24.000845672 Unknown NULL NULL 6705 2018-03-08 23:04:59 27 NULL 6705-09-29
+6731-02-12 08:12:48.287783702 6731-02-11 06:09:44.287783702 Unknown NULL NULL 6731 2018-03-08 23:04:59 12 NULL 6731-02-13
+7160-12-02 06:00:24.81200852 7160-12-01 03:57:20.81200852 Unknown NULL NULL 7160 2018-03-08 23:04:59 0 NULL 7161-12-02
+7409-09-07 23:33:32.459349602 7409-09-06 21:30:28.459349602 Unknown NULL NULL 7409 2018-03-08 23:04:59 33 NULL 7409-09-08
+7503-06-23 23:14:17.486 7503-06-22 21:11:13.486 Unknown NULL NULL 7503 2018-03-08 23:04:59 14 NULL 7503-06-24
+8422-07-22 03:21:45.745036084 8422-07-21 01:18:41.745036084 Unknown NULL NULL 8422 2018-03-08 23:04:59 21 NULL 8422-07-23
+8521-01-16 20:42:05.668832388 8521-01-15 18:39:01.668832388 Unknown NULL NULL 8521 2018-03-08 23:04:59 42 NULL 8521-01-17
+9075-06-13 16:20:09.218517797 9075-06-12 14:17:05.218517797 Unknown NULL NULL 9075 2018-03-08 23:04:59 20 NULL 9075-06-14
+9209-11-11 04:08:58.223768453 9209-11-10 02:05:54.223768453 Unknown NULL NULL 9209 2018-03-08 23:04:59 8 NULL 9209-11-12
+9403-01-09 18:12:33.547 9403-01-08 16:09:29.547 Unknown NULL NULL 9403 2018-03-08 23:04:59 12 NULL 9403-01-10
+PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
+SELECT
+ ctimestamp1,
+ ctimestamp2,
+ CASE
+ WHEN ctimestamp2 <= date '1800-12-31' THEN "1800s or Earlier"
+ WHEN ctimestamp2 < date '1900-01-01' THEN "1900s"
+ WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s"
+ ELSE "Unknown" END AS ctimestamp2_Description,
+ CASE
+ WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old"
+ WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s"
+ WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s"
+ ELSE NULL END AS ctimestamp2_Description_2,
+ CASE
+ WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old"
+ WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s"
+ WHEN ctimestamp2 between timestamp '2006-01-01 00:00:00.000' and timestamp '2010-12-31 23:59:59.999999999' THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN NULL
+ ELSE NULL END AS ctimestamp2_Description_3,
+ IF(timestamp '1974-10-04 17:21:03.989' > ctimestamp1, year(ctimestamp1), year(ctimestamp2)) AS field1,
+ CASE WHEN stimestamp1 LIKE '%19%'
+ THEN stimestamp1
+ ELSE timestamp '2018-03-08 23:04:59' END AS Field_2,
+ IF(ctimestamp1 = timestamp '2021-09-24 03:18:32.413655165' , NULL, minute(ctimestamp1)) AS Field_3,
+ IF(ctimestamp2 >= timestamp '5344-10-04 18:40:08.165' and ctimestamp2 < timestamp '6631-11-13 16:31:29.702202248', minute(ctimestamp1), NULL) AS Field_4,
+ IF(cast(ctimestamp1 as double) % 500 > 100, DATE_ADD(cdate, 1), DATE_ADD(cdate, 365)) AS Field_5
+FROM timestamps
+ORDER BY ctimestamp1, stimestamp1, ctimestamp2
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
+SELECT
+ ctimestamp1,
+ ctimestamp2,
+ CASE
+ WHEN ctimestamp2 <= date '1800-12-31' THEN "1800s or Earlier"
+ WHEN ctimestamp2 < date '1900-01-01' THEN "1900s"
+ WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s"
+ ELSE "Unknown" END AS ctimestamp2_Description,
+ CASE
+ WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old"
+ WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s"
+ WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s"
+ ELSE NULL END AS ctimestamp2_Description_2,
+ CASE
+ WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old"
+ WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s"
+ WHEN ctimestamp2 between timestamp '2006-01-01 00:00:00.000' and timestamp '2010-12-31 23:59:59.999999999' THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN NULL
+ ELSE NULL END AS ctimestamp2_Description_3,
+ IF(timestamp '1974-10-04 17:21:03.989' > ctimestamp1, year(ctimestamp1), year(ctimestamp2)) AS field1,
+ CASE WHEN stimestamp1 LIKE '%19%'
+ THEN stimestamp1
+ ELSE timestamp '2018-03-08 23:04:59' END AS Field_2,
+ IF(ctimestamp1 = timestamp '2021-09-24 03:18:32.413655165' , NULL, minute(ctimestamp1)) AS Field_3,
+ IF(ctimestamp2 >= timestamp '5344-10-04 18:40:08.165' and ctimestamp2 < timestamp '6631-11-13 16:31:29.702202248', minute(ctimestamp1), NULL) AS Field_4,
+ IF(cast(ctimestamp1 as double) % 500 > 100, DATE_ADD(cdate, 1), DATE_ADD(cdate, 365)) AS Field_5
+FROM timestamps
+ORDER BY ctimestamp1, stimestamp1, ctimestamp2
+POSTHOOK: type: QUERY
+Explain
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: timestamps
+ Statistics: Num rows: 51 Data size: 12384 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:cdate:date, 1:ctimestamp1:timestamp, 2:stimestamp1:string, 3:ctimestamp2:timestamp, 4:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>]
+ Select Operator
+ expressions: ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), CASE WHEN ((ctimestamp2 <= TIMESTAMP'1800-12-31 00:00:00.0')) THEN ('1800s or Earlier') WHEN ((ctimestamp2 < TIMESTAMP'1900-01-01 00:00:00.0')) THEN ('1900s') WHEN (ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00.0' AND TIMESTAMP'2010-12-31 23:59:59.999999999') THEN ('Late 2000s') WHEN ((ctimestamp2 <= TIMESTAMP'2015-12-31 23:59:59.999999999')) THEN ('Early 2010s') ELSE ('Unknown') END (type: string), CASE WHEN ((ctimestamp2 <= TIMESTAMP'2000-12-31 23:59:59.999999999')) THEN ('Old') WHEN ((ctimestamp2 < TIMESTAMP'2006-01-01 00:00:00.0')) THEN ('Early 2000s') WHEN (ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00.0' AND TIMESTAMP'2010-12-31 23:59:59.999999999') THEN ('Late 2000s') WHEN ((ctimestamp2 <= TIMESTAMP'2015-12-31 23:59:59.999999999')) THEN ('Early 2010s') ELSE (null) END (type: string), CASE WHEN ((ctimestamp2 <= TIMESTAMP'2000-12-31 23:59:59.999999999')) THEN ('Old') WHEN ((ctimesta
mp2 < TIMESTAMP'2006-01-01 00:00:00.0')) THEN ('Early 2000s') WHEN (ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00.0' AND TIMESTAMP'2010-12-31 23:59:59.999999999') THEN ('Late 2000s') WHEN ((ctimestamp2 <= TIMESTAMP'2015-12-31 23:59:59.999999999')) THEN (null) ELSE (null) END (type: string), if((TIMESTAMP'1974-10-04 17:21:03.989' > ctimestamp1), year(ctimestamp1), year(ctimestamp2)) (type: int), CASE WHEN ((stimestamp1 like '%19%')) THEN (stimestamp1) ELSE (TIMESTAMP'2018-03-08 23:04:59.0') END (type: string), if((ctimestamp1 = TIMESTAMP'2021-09-24 03:18:32.413655165'), null, minute(ctimestamp1)) (type: int), if(((ctimestamp2 >= TIMESTAMP'5344-10-04 18:40:08.165') and (ctimestamp2 < TIMESTAMP'6631-11-13 16:31:29.702202248')), minute(ctimestamp1), null) (type: int), if(((UDFToDouble(ctimestamp1) % 500.0D) > 100.0D), date_add(cdate, 1), date_add(cdate, 365)) (type: date), stimestamp1 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [1, 3, 10, 12, 13, 14, 11, 7, 16, 23, 2]
+ selectExpressions: IfExprStringScalarStringGroupColumn(col 5:boolean, val 1800s or Earliercol 9:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 1800-12-31 00:00:00.0) -> 5:boolean, IfExprStringScalarStringGroupColumn(col 6:boolean, val 1900scol 10:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 1900-01-01 00:00:00.0) -> 6:boolean, IfExprStringScalarStringGroupColumn(col 7:boolean, val Late 2000scol 9:string)(children: VectorUDFAdaptor(ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00.0' AND TIMESTAMP'2010-12-31 23:59:59.999999999') -> 7:boolean, IfExprStringScalarStringScalar(col 8:boolean, val Early 2010s, val Unknown)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2015-12-31 23:59:59.999999999) -> 8:boolean) -> 9:string) -> 10:string) -> 9:string) -> 10:string, IfExprStringScalarStringGroupColumn(col 5:boolean, val Oldcol 11:string)(children: TimestampColLessEqualTimestampScalar(col 3:timesta
mp, val 2000-12-31 23:59:59.999999999) -> 5:boolean, IfExprStringScalarStringGroupColumn(col 6:boolean, val Early 2000scol 12:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 2006-01-01 00:00:00.0) -> 6:boolean, IfExprStringScalarStringGroupColumn(col 7:boolean, val Late 2000scol 11:string)(children: VectorUDFAdaptor(ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00.0' AND TIMESTAMP'2010-12-31 23:59:59.999999999') -> 7:boolean, IfExprColumnNull(col 8:boolean, col 9:string, null)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2015-12-31 23:59:59.999999999) -> 8:boolean, ConstantVectorExpression(val Early 2010s) -> 9:string) -> 11:string) -> 12:string) -> 11:string) -> 12:string, IfExprStringScalarStringGroupColumn(col 5:boolean, val Oldcol 11:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2000-12-31 23:59:59.999999999) -> 5:boolean, IfExprStringScalarStringGroupColumn(col 6:boolean, val Early 2000scol 13:string)(
children: TimestampColLessTimestampScalar(col 3:timestamp, val 2006-01-01 00:00:00.0) -> 6:boolean, IfExprStringScalarStringGroupColumn(col 7:boolean, val Late 2000scol 11:string)(children: VectorUDFAdaptor(ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00.0' AND TIMESTAMP'2010-12-31 23:59:59.999999999') -> 7:boolean, IfExprNullNull(null, null) -> 11:string) -> 13:string) -> 11:string) -> 13:string, IfExprLongColumnLongColumn(col 5:boolean, col 6:int, col 7:int)(children: TimestampScalarGreaterTimestampColumn(val 1974-10-04 17:21:03.989, col 1:timestamp) -> 5:boolean, VectorUDFYearTimestamp(col 1:timestamp, field YEAR) -> 6:int, VectorUDFYearTimestamp(col 3:timestamp, field YEAR) -> 7:int) -> 14:int, VectorUDFAdaptor(CASE WHEN ((stimestamp1 like '%19%')) THEN (stimestamp1) ELSE (TIMESTAMP'2018-03-08 23:04:59.0') END)(children: SelectStringColLikeStringScalar(col 2:string) -> 5:boolean) -> 11:string, IfExprNullColumn(col 5:boolean, null, col 6)(children: TimestampColEqualTimestampSca
lar(col 1:timestamp, val 2021-09-24 03:18:32.413655165) -> 5:boolean, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 6:int) -> 7:int, IfExprColumnNull(col 17:boolean, col 15:int, null)(children: ColAndCol(col 15:boolean, col 16:boolean)(children: TimestampColGreaterEqualTimestampScalar(col 3:timestamp, val 5344-10-04 18:40:08.165) -> 15:boolean, TimestampColLessTimestampScalar(col 3:timestamp, val 6631-11-13 16:31:29.702202248) -> 16:boolean) -> 17:boolean, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 15:int) -> 16:int, IfExprLongColumnLongColumn(col 20:boolean, col 21:date, col 22:date)(children: DoubleColGreaterDoubleScalar(col 19:double, val 100.0)(children: DoubleColModuloDoubleScalar(col 18:double, val 500.0)(children: CastTimestampToDouble(col 1:timestamp) -> 18:double) -> 19:double) -> 20:boolean, VectorUDFDateAddColScalar(col 0:date, val 1) -> 21:date, VectorUDFDateAddColScalar(col 0:date, val 365) -> 22:date) -> 23:date
+ Statistics: Num rows: 51 Data size: 12384 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: timestamp), _col10 (type: string), _col1 (type: timestamp)
+ sort order: +++
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
+ Statistics: Num rows: 51 Data size: 12384 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: int), _col8 (type: int), _col9 (type: date)
+ Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: []
+ featureSupportInUse: []
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: true
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 4
+ includeColumns: [0, 1, 2, 3]
+ dataColumns: cdate:date, ctimestamp1:timestamp, stimestamp1:string, ctimestamp2:timestamp
+ partitionColumnCount: 0
+ scratchColumnTypeNames: [bigint, bigint, bigint, bigint, string, string, string, string, string, bigint, bigint, bigint, bigint, double, double, bigint, bigint, bigint, bigint]
+ Reduce Vectorization:
+ enabled: false
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
+ enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey2 (type: timestamp), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: int), VALUE._col4 (type: string), VALUE._col5 (type: int), VALUE._col6 (type: int), VALUE._col7 (type: date)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
+ Statistics: Num rows: 51 Data size: 12384 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 51 Data size: 12384 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT
+ ctimestamp1,
+ ctimestamp2,
+ CASE
+ WHEN ctimestamp2 <= date '1800-12-31' THEN "1800s or Earlier"
+ WHEN ctimestamp2 < date '1900-01-01' THEN "1900s"
+ WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s"
+ ELSE "Unknown" END AS ctimestamp2_Description,
+ CASE
+ WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old"
+ WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s"
+ WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s"
+ ELSE NULL END AS ctimestamp2_Description_2,
+ CASE
+ WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old"
+ WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s"
+ WHEN ctimestamp2 between timestamp '2006-01-01 00:00:00.000' and timestamp '2010-12-31 23:59:59.999999999' THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN NULL
+ ELSE NULL END AS ctimestamp2_Description_3,
+ IF(timestamp '1974-10-04 17:21:03.989' > ctimestamp1, year(ctimestamp1), year(ctimestamp2)) AS field1,
+ CASE WHEN stimestamp1 LIKE '%19%'
+ THEN stimestamp1
+ ELSE timestamp '2018-03-08 23:04:59' END AS Field_2,
+ IF(ctimestamp1 = timestamp '2021-09-24 03:18:32.413655165' , NULL, minute(ctimestamp1)) AS Field_3,
+ IF(ctimestamp2 >= timestamp '5344-10-04 18:40:08.165' and ctimestamp2 < timestamp '6631-11-13 16:31:29.702202248', minute(ctimestamp1), NULL) AS Field_4,
+ IF(cast(ctimestamp1 as double) % 500 > 100, DATE_ADD(cdate, 1), DATE_ADD(cdate, 365)) AS Field_5
+FROM timestamps
+ORDER BY ctimestamp1, stimestamp1, ctimestamp2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@timestamps
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT
+ ctimestamp1,
+ ctimestamp2,
+ CASE
+ WHEN ctimestamp2 <= date '1800-12-31' THEN "1800s or Earlier"
+ WHEN ctimestamp2 < date '1900-01-01' THEN "1900s"
+ WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s"
+ ELSE "Unknown" END AS ctimestamp2_Description,
+ CASE
+ WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old"
+ WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s"
+ WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s"
+ ELSE NULL END AS ctimestamp2_Description_2,
+ CASE
+ WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old"
+ WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s"
+ WHEN ctimestamp2 between timestamp '2006-01-01 00:00:00.000' and timestamp '2010-12-31 23:59:59.999999999' THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN NULL
+ ELSE NULL END AS ctimestamp2_Description_3,
+ IF(timestamp '1974-10-04 17:21:03.989' > ctimestamp1, year(ctimestamp1), year(ctimestamp2)) AS field1,
+ CASE WHEN stimestamp1 LIKE '%19%'
+ THEN stimestamp1
+ ELSE timestamp '2018-03-08 23:04:59' END AS Field_2,
+ IF(ctimestamp1 = timestamp '2021-09-24 03:18:32.413655165' , NULL, minute(ctimestamp1)) AS Field_3,
+ IF(ctimestamp2 >= timestamp '5344-10-04 18:40:08.165' and ctimestamp2 < timestamp '6631-11-13 16:31:29.702202248', minute(ctimestamp1), NULL) AS Field_4,
+ IF(cast(ctimestamp1 as double) % 500 > 100, DATE_ADD(cdate, 1), DATE_ADD(cdate, 365)) AS Field_5
+FROM timestamps
+ORDER BY ctimestamp1, stimestamp1, ctimestamp2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@timestamps
+#### A masked pattern was here ####
+ctimestamp1 ctimestamp2 ctimestamp2_description ctimestamp2_description_2 ctimestamp2_description_3 field1 field_2 field_3 field_4 field_5
+NULL NULL Unknown NULL NULL NULL 2018-03-08 23:04:59 NULL NULL NULL
+0004-09-22 18:26:29.519542222 0004-09-21 16:23:25.519542222 1800s or Earlier Old Old 4 0004-09-22 18:26:29.519542222 26 NULL 0005-09-22
+0528-10-27 08:15:18.941718273 0528-10-26 06:12:14.941718273 1800s or Earlier Old Old 528 2018-03-08 23:04:59 15 NULL 0529-10-27
+1319-02-02 16:31:57.778 1319-02-01 14:28:53.778 1800s or Earlier Old Old 1319 1319-02-02 16:31:57.778 31 NULL 1320-02-02
+1404-07-23 15:32:16.059185026 1404-07-22 13:29:12.059185026 1800s or Earlier Old Old 1404 2018-03-08 23:04:59 32 NULL 1405-07-23
+1815-05-06 00:12:37.543584705 1815-05-04 22:09:33.543584705 1900s Old Old 1815 2018-03-08 23:04:59 12 NULL 1816-05-05
+1883-04-17 04:14:34.647766229 1883-04-16 02:11:30.647766229 1900s Old Old 1883 2018-03-08 23:04:59 14 NULL 1884-04-16
+1966-08-16 13:36:50.183618031 1966-08-15 11:33:46.183618031 Early 2010s Old Old 1966 1966-08-16 13:36:50.183618031 36 NULL 1967-08-16
+1973-04-17 06:30:38.596784156 1973-04-16 04:27:34.596784156 Early 2010s Old Old 1973 1973-04-17 06:30:38.596784156 30 NULL 1974-04-17
+1974-10-04 17:21:03.989 1974-10-03 15:17:59.989 Early 2010s Old Old 1974 1974-10-04 17:21:03.989 21 NULL 1974-10-05
+1976-03-03 04:54:33.000895162 1976-03-02 02:51:29.000895162 Early 2010s Old Old 1976 1976-03-03 04:54:33.000895162 54 NULL 1976-03-04
+1976-05-06 00:42:30.910786948 1976-05-04 22:39:26.910786948 Early 2010s Old Old 1976 1976-05-06 00:42:30.910786948 42 NULL 1977-05-06
+1978-08-05 14:41:05.501 1978-08-04 12:38:01.501 Early 2010s Old Old 1978 1978-08-05 14:41:05.501 41 NULL 1978-08-06
+1981-04-25 09:01:12.077192689 1981-04-24 06:58:08.077192689 Early 2010s Old Old 1981 1981-04-25 09:01:12.077192689 1 NULL 1982-04-25
+1981-11-15 23:03:10.999338387 1981-11-14 21:00:06.999338387 Early 2010s Old Old 1981 1981-11-15 23:03:10.999338387 3 NULL 1981-11-16
+1985-07-20 09:30:11 1985-07-19 07:27:07 Early 2010s Old Old 1985 1985-07-20 09:30:11 30 NULL 1986-07-20
+1985-11-18 16:37:54 1985-11-17 14:34:50 Early 2010s Old Old 1985 1985-11-18 16:37:54 37 NULL 1985-11-19
+1987-02-21 19:48:29 1987-02-20 17:45:25 Early 2010s Old Old 1987 1987-02-21 19:48:29 48 NULL 1987-02-22
+1987-05-28 13:52:07.900916635 1987-05-27 11:49:03.900916635 Early 2010s Old Old 1987 1987-05-28 13:52:07.900916635 52 NULL 1988-05-27
+1998-10-16 20:05:29.397591987 1998-10-15 18:02:25.397591987 Early 2010s Old Old 1998 1998-10-16 20:05:29.397591987 5 NULL 1999-10-16
+1999-10-03 16:59:10.396903939 1999-10-02 14:56:06.396903939 Early 2010s Old Old 1999 1999-10-03 16:59:10.396903939 59 NULL 1999-10-04
+2000-12-18 08:42:30.000595596 2000-12-17 06:39:26.000595596 Early 2010s Old Old 2000 2018-03-08 23:04:59 42 NULL 2000-12-19
+2002-05-10 05:29:48.990818073 2002-05-09 03:26:44.990818073 Early 2010s Early 2000s Early 2000s 2002 2018-03-08 23:04:59 29 NULL 2002-05-11
+2003-09-23 22:33:17.00003252 2003-09-22 20:30:13.00003252 Early 2010s Early 2000s Early 2000s 2003 2018-03-08 23:04:59 33 NULL 2004-09-22
+2004-03-07 20:14:13 2004-03-06 18:11:09 Early 2010s Early 2000s Early 2000s 2004 2018-03-08 23:04:59 14 NULL 2004-03-08
+2007-02-09 05:17:29.368756876 2007-02-08 03:14:25.368756876 Late 2000s Late 2000s Late 2000s 2007 2018-03-08 23:04:59 17 NULL 2008-02-09
+2009-01-21 10:49:07.108 2009-01-20 08:46:03.108 Late 2000s Late 2000s Late 2000s 2009 2018-03-08 23:04:59 49 NULL 2009-01-22
+2010-04-08 02:43:35.861742727 2010-04-07 00:40:31.861742727 Late 2000s Late 2000s Late 2000s 2010 2018-03-08 23:04:59 43 NULL 2010-04-09
+2013-04-07 02:44:43.00086821 2013-04-06 00:41:39.00086821 Early 2010s Early 2010s NULL 2013 2018-03-08 23:04:59 44 NULL 2013-04-08
+2013-04-10 00:43:46.854731546 2013-04-08 22:40:42.854731546 Early 2010s Early 2010s NULL 2013 2018-03-08 23:04:59 43 NULL 2013-04-11
+2021-09-24 03:18:32.413655165 2021-09-23 01:15:28.413655165 Unknown NULL NULL 2021 2018-03-08 23:04:59 NULL NULL 2021-09-25
+2024-11-11 16:42:41.101 2024-11-10 14:39:37.101 Unknown NULL NULL 2024 2018-03-08 23:04:59 42 NULL 2024-11-12
+4143-07-08 10:53:27.252802259 4143-07-07 08:50:23.252802259 Unknown NULL NULL 4143 2018-03-08 23:04:59 53 NULL 4143-07-09
+4966-12-04 09:30:55.202 4966-12-03 07:27:51.202 Unknown NULL NULL 4966 2018-03-08 23:04:59 30 NULL 4966-12-05
+5339-02-01 14:10:01.085678691 5339-01-31 12:06:57.085678691 Unknown NULL NULL 5339 2018-03-08 23:04:59 10 NULL 5340-02-01
+5344-10-04 18:40:08.165 5344-10-03 16:37:04.165 Unknown NULL NULL 5344 2018-03-08 23:04:59 40 NULL 5344-10-05
+5397-07-13 07:12:32.000896438 5397-07-12 05:09:28.000896438 Unknown NULL NULL 5397 2018-03-08 23:04:59 12 12 5397-07-14
+5966-07-09 03:30:50.597 5966-07-08 01:27:46.597 Unknown NULL NULL 5966 2018-03-08 23:04:59 30 30 5967-07-09
+6229-06-28 02:54:28.970117179 6229-06-27 00:51:24.970117179 Unknown NULL NULL 6229 2018-03-08 23:04:59 54 54 6230-06-28
+6482-04-27 12:07:38.073915413 6482-04-26 10:04:34.073915413 Unknown NULL NULL 6482 2018-03-08 23:04:59 7 7 6482-04-28
+6631-11-13 16:31:29.702202248 6631-11-12 14:28:25.702202248 Unknown NULL NULL 6631 2018-03-08 23:04:59 31 31 6631-11-14
+6705-09-28 18:27:28.000845672 6705-09-27 16:24:24.000845672 Unknown NULL NULL 6705 2018-03-08 23:04:59 27 NULL 6705-09-29
+6731-02-12 08:12:48.287783702 6731-02-11 06:09:44.287783702 Unknown NULL NULL 6731 2018-03-08 23:04:59 12 NULL 6731-02-13
+7160-12-02 06:00:24.81200852 7160-12-01 03:57:20.81200852 Unknown NULL NULL 7160 2018-03-08 23:04:59 0 NULL 7161-12-02
+7409-09-07 23:33:32.459349602 7409-09-06 21:30:28.459349602 Unknown NULL NULL 7409 2018-03-08 23:04:59 33 NULL 7409-09-08
+7503-06-23 23:14:17.486 7503-06-22 21:11:13.486 Unknown NULL NULL 7503 2018-03-08 23:04:59 14 NULL 7503-06-24
+8422-07-22 03:21:45.745036084 8422-07-21 01:18:41.745036084 Unknown NULL NULL 8422 2018-03-08 23:04:59 21 NULL 8422-07-23
+8521-01-16 20:42:05.668832388 8521-01-15 18:39:01.668832388 Unknown NULL NULL 8521 2018-03-08 23:04:59 42 NULL 8521-01-17
+9075-06-13 16:20:09.218517797 9075-06-12 14:17:05.218517797 Unknown NULL NULL 9075 2018-03-08 23:04:59 20 NULL 9075-06-14
+9209-11-11 04:08:58.223768453 9209-11-10 02:05:54.223768453 Unknown NULL NULL 9209 2018-03-08 23:04:59 8 NULL 9209-11-12
+9403-01-09 18:12:33.547 9403-01-08 16:09:29.547 Unknown NULL NULL 9403 2018-03-08 23:04:59 12 NULL 9403-01-10
+PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
+SELECT
+ ctimestamp1,
+ ctimestamp2,
+ CASE
+ WHEN ctimestamp2 <= date '1800-12-31' THEN "1800s or Earlier"
+ WHEN ctimestamp2 < date '1900-01-01' THEN "1900s"
+ WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s"
+ ELSE "Unknown" END AS ctimestamp2_Description,
+ CASE
+ WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old"
+ WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s"
+ WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s"
+ ELSE NULL END AS ctimestamp2_Description_2,
+ CASE
+ WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old"
+ WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s"
+ WHEN ctimestamp2 between timestamp '2006-01-01 00:00:00.000' and timestamp '2010-12-31 23:59:59.999999999' THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN NULL
+ ELSE NULL END AS ctimestamp2_Description_3,
+ IF(timestamp '1974-10-04 17:21:03.989' > ctimestamp1, year(ctimestamp1), year(ctimestamp2)) AS field1,
+ CASE WHEN stimestamp1 LIKE '%19%'
+ THEN stimestamp1
+ ELSE timestamp '2018-03-08 23:04:59' END AS Field_2,
+ IF(ctimestamp1 = timestamp '2021-09-24 03:18:32.413655165' , NULL, minute(ctimestamp1)) AS Field_3,
+ IF(ctimestamp2 >= timestamp '5344-10-04 18:40:08.165' and ctimestamp2 < timestamp '6631-11-13 16:31:29.702202248', minute(ctimestamp1), NULL) AS Field_4,
+ IF(cast(ctimestamp1 as double) % 500 > 100, DATE_ADD(cdate, 1), DATE_ADD(cdate, 365)) AS Field_5
+FROM timestamps
+ORDER BY ctimestamp1, stimestamp1, ctimestamp2
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
+SELECT
+ ctimestamp1,
+ ctimestamp2,
+ CASE
+ WHEN ctimestamp2 <= date '1800-12-31' THEN "1800s or Earlier"
+ WHEN ctimestamp2 < date '1900-01-01' THEN "1900s"
+ WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s"
+ ELSE "Unknown" END AS ctimestamp2_Description,
+ CASE
+ WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old"
+ WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s"
+ WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s"
+ ELSE NULL END AS ctimestamp2_Description_2,
+ CASE
+ WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old"
+ WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s"
+ WHEN ctimestamp2 between timestamp '2006-01-01 00:00:00.000' and timestamp '2010-12-31 23:59:59.999999999' THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN NULL
+ ELSE NULL END AS ctimestamp2_Description_3,
+ IF(timestamp '1974-10-04 17:21:03.989' > ctimestamp1, year(ctimestamp1), year(ctimestamp2)) AS field1,
+ CASE WHEN stimestamp1 LIKE '%19%'
+ THEN stimestamp1
+ ELSE timestamp '2018-03-08 23:04:59' END AS Field_2,
+ IF(ctimestamp1 = timestamp '2021-09-24 03:18:32.413655165' , NULL, minute(ctimestamp1)) AS Field_3,
+ IF(ctimestamp2 >= timestamp '5344-10-04 18:40:08.165' and ctimestamp2 < timestamp '6631-11-13 16:31:29.702202248', minute(ctimestamp1), NULL) AS Field_4,
+ IF(cast(ctimestamp1 as double) % 500 > 100, DATE_ADD(cdate, 1), DATE_ADD(cdate, 365)) AS Field_5
+FROM timestamps
+ORDER BY ctimestamp1, stimestamp1, ctimestamp2
+POSTHOOK: type: QUERY
+Explain
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: timestamps
+ Statistics: Num rows: 51 Data size: 12384 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:cdate:date, 1:ctimestamp1:timestamp, 2:stimestamp1:string, 3:ctimestamp2:timestamp, 4:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>]
+ Select Operator
+ expressions: ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), CASE WHEN ((ctimestamp2 <= TIMESTAMP'1800-12-31 00:00:00.0')) THEN ('1800s or Earlier') WHEN ((ctimestamp2 < TIMESTAMP'1900-01-01 00:00:00.0')) THEN ('1900s') WHEN (ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00.0' AND TIMESTAMP'2010-12-31 23:59:59.999999999') THEN ('Late 2000s') WHEN ((ctimestamp2 <= TIMESTAMP'2015-12-31 23:59:59.999999999')) THEN ('Early 2010s') ELSE ('Unknown') END (type: string), CASE WHEN ((ctimestamp2 <= TIMESTAMP'2000-12-31 23:59:59.999999999')) THEN ('Old') WHEN ((ctimestamp2 < TIMESTAMP'2006-01-01 00:00:00.0')) THEN ('Early 2000s') WHEN (ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00.0' AND TIMESTAMP'2010-12-31 23:59:59.999999999') THEN ('Late 2000s') WHEN ((ctimestamp2 <= TIMESTAMP'2015-12-31 23:59:59.999999999')) THEN ('Early 2010s') ELSE (null) END (type: string), CASE WHEN ((ctimestamp2 <= TIMESTAMP'2000-12-31 23:59:59.999999999')) THEN ('Old') WHEN ((ctimesta
mp2 < TIMESTAMP'2006-01-01 00:00:00.0')) THEN ('Early 2000s') WHEN (ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00.0' AND TIMESTAMP'2010-12-31 23:59:59.999999999') THEN ('Late 2000s') WHEN ((ctimestamp2 <= TIMESTAMP'2015-12-31 23:59:59.999999999')) THEN (null) ELSE (null) END (type: string), if((TIMESTAMP'1974-10-04 17:21:03.989' > ctimestamp1), year(ctimestamp1), year(ctimestamp2)) (type: int), CASE WHEN ((stimestamp1 like '%19%')) THEN (stimestamp1) ELSE (TIMESTAMP'2018-03-08 23:04:59.0') END (type: string), if((ctimestamp1 = TIMESTAMP'2021-09-24 03:18:32.413655165'), null, minute(ctimestamp1)) (type: int), if(((ctimestamp2 >= TIMESTAMP'5344-10-04 18:40:08.165') and (ctimestamp2 < TIMESTAMP'6631-11-13 16:31:29.702202248')), minute(ctimestamp1), null) (type: int), if(((UDFToDouble(ctimestamp1) % 500.0D) > 100.0D), date_add(cdate, 1), date_add(cdate, 365)) (type: date), stimestamp1 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [1, 3, 15, 26, 36, 40, 42, 44, 46, 53, 2]
+ selectExpressions: IfExprColumnCondExpr(col 5:boolean, col 6:stringcol 14:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 1800-12-31 00:00:00.0) -> 5:boolean, ConstantVectorExpression(val 1800s or Earlier) -> 6:string, IfExprColumnCondExpr(col 7:boolean, col 8:stringcol 13:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 1900-01-01 00:00:00.0) -> 7:boolean, ConstantVectorExpression(val 1900s) -> 8:string, IfExprColumnCondExpr(col 9:boolean, col 10:stringcol 12:string)(children: VectorUDFAdaptor(ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00.0' AND TIMESTAMP'2010-12-31 23:59:59.999999999') -> 9:boolean, ConstantVectorExpression(val Late 2000s) -> 10:string, IfExprStringScalarStringScalar(col 11:boolean, val Early 2010s, val Unknown)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2015-12-31 23:59:59.999999999) -> 11:boolean) -> 12:string) -> 13:string) -> 14:string) -> 15:string, IfExprColumnCo
ndExpr(col 11:boolean, col 16:stringcol 25:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2000-12-31 23:59:59.999999999) -> 11:boolean, ConstantVectorExpression(val Old) -> 16:string, IfExprColumnCondExpr(col 17:boolean, col 18:stringcol 24:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 2006-01-01 00:00:00.0) -> 17:boolean, ConstantVectorExpression(val Early 2000s) -> 18:string, IfExprColumnCondExpr(col 19:boolean, col 20:stringcol 23:string)(children: VectorUDFAdaptor(ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00.0' AND TIMESTAMP'2010-12-31 23:59:59.999999999') -> 19:boolean, ConstantVectorExpression(val Late 2000s) -> 20:string, IfExprColumnNull(col 21:boolean, col 22:string, null)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2015-12-31 23:59:59.999999999) -> 21:boolean, ConstantVectorExpression(val Early 2010s) -> 22:string) -> 23:string) -> 24:string) -> 25:string) -> 26:string, IfExprColumnCondExpr(
col 27:boolean, col 28:stringcol 35:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2000-12-31 23:59:59.999999999) -> 27:boolean, ConstantVectorExpression(val Old) -> 28:string, IfExprColumnCondExpr(col 29:boolean, col 30:stringcol 34:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 2006-01-01 00:00:00.0) -> 29:boolean, ConstantVectorExpression(val Early 2000s) -> 30:string, IfExprColumnCondExpr(col 31:boolean, col 32:stringcol 33:string)(children: VectorUDFAdaptor(ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00.0' AND TIMESTAMP'2010-12-31 23:59:59.999999999') -> 31:boolean, ConstantVectorExpression(val Late 2000s) -> 32:string, IfExprNullNull(null, null) -> 33:string) -> 34:string) -> 35:string) -> 36:string, IfExprCondExprCondExpr(col 37:boolean, col 38:intcol 39:int)(children: TimestampScalarGreaterTimestampColumn(val 1974-10-04 17:21:03.989, col 1:timestamp) -> 37:boolean, VectorUDFYearTimestamp(col 1:timestamp, field YEAR) ->
38:int, VectorUDFYearTimestamp(col 3:timestamp, field YEAR) -> 39:int) -> 40:int, VectorUDFAdaptor(CASE WHEN ((stimestamp1 like '%19%')) THEN (stimestamp1) ELSE (TIMESTAMP'2018-03-08 23:04:59.0') END)(children: SelectStringColLikeStringScalar(col 2:string) -> 41:boolean) -> 42:string, IfExprNullCondExpr(col 41:boolean, null, col 43:int)(children: TimestampColEqualTimestampScalar(col 1:timestamp, val 2021-09-24 03:18:32.413655165) -> 41:boolean, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 43:int) -> 44:int, IfExprCondExprNull(col 47:boolean, col 45:int, null)(children: ColAndCol(col 45:boolean, col 46:boolean)(children: TimestampColGreaterEqualTimestampScalar(col 3:timestamp, val 5344-10-04 18:40:08.165) -> 45:boolean, TimestampColLessTimestampScalar(col 3:timestamp, val 6631-11-13 16:31:29.702202248) -> 46:boolean) -> 47:boolean, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 45:int) -> 46:int, IfExprCondExprCondExpr(col 50:boolean, col 51:datecol 52:date)
(children: DoubleColGreaterDoubleScalar(col 49:double, val 100.0)(children: DoubleColModuloDoubleScalar(col 48:double, val 500.0)(children: CastTimestampToDouble(col 1:timestamp) -> 48:double) -> 49:double) -> 50:boolean, VectorUDFDateAddColScalar(col 0:date, val 1) -> 51:date, VectorUDFDateAddColScalar(col 0:date, val 365) -> 52:date) -> 53:date
+ Statistics: Num rows: 51 Data size: 12384 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: timestamp), _col10 (type: string), _col1 (type: timestamp)
+ sort order: +++
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
+ Statistics: Num rows: 51 Data size: 12384 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: int), _col8 (type: int), _col9 (type: date)
+ Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: []
+ featureSupportInUse: []
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: true
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 4
+ includeColumns: [0, 1, 2, 3]
+ dataColumns: cdate:date, ctimestamp1:timestamp, stimestamp1:string, ctimestamp2:timestamp
+ partitionColumnCount: 0
+ scratchColumnTypeNames: [bigint, string, bigint, string, bigint, string, bigint, string, string, string, string, string, bigint, string, bigint, string, bigint, string, string, string, string, string, bigint, string, bigint, string, bigint, string, string, string, string, string, bigint, bigint, bigint, bigint, bigint, string, bigint, bigint, bigint, bigint, bigint, double, double, bigint, bigint, bigint, bigint]
+ Reduce Vectorization:
+ enabled: false
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
+ enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey2 (type: timestamp), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: int), VALUE._col4 (type: string), VALUE._col5 (type: int), VALUE._col6 (type: int), VALUE._col7 (type: date)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
+ Statistics: Num rows: 51 Data size: 12384 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 51 Data size: 12384 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT
+ ctimestamp1,
+ ctimestamp2,
+ CASE
+ WHEN ctimestamp2 <= date '1800-12-31' THEN "1800s or Earlier"
+ WHEN ctimestamp2 < date '1900-01-01' THEN "1900s"
+ WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s"
+ ELSE "Unknown" END AS ctimestamp2_Description,
+ CASE
+ WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old"
+ WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s"
+ WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s"
+ ELSE NULL END AS ctimestamp2_Description_2,
+ CASE
+ WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old"
+ WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s"
+ WHEN ctimestamp2 between timestamp '2006-01-01 00:00:00.000' and timestamp '2010-12-31 23:59:59.999999999' THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN NULL
+ ELSE NULL END AS ctimestamp2_Description_3,
+ IF(timestamp '1974-10-04 17:21:03.989' > ctimestamp1, year(ctimestamp1), year(ctimestamp2)) AS field1,
+ CASE WHEN stimestamp1 LIKE '%19%'
+ THEN stimestamp1
+ ELSE timestamp '2018-03-08 23:04:59' END AS Field_2,
+ IF(ctimestamp1 = timestamp '2021-09-24 03:18:32.413655165' , NULL, minute(ctimestamp1)) AS Field_3,
+ IF(ctimestamp2 >= timestamp '5344-10-04 18:40:08.165' and ctimestamp2 < timestamp '6631-11-13 16:31:29.702202248', minute(ctimestamp1), NULL) AS Field_4,
+ IF(cast(ctimestamp1 as double) % 500 > 100, DATE_ADD(cdate, 1), DATE_ADD(cdate, 365)) AS Field_5
+FROM timestamps
+ORDER BY ctimestamp1, stimestamp1, ctimestamp2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@timestamps
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT
+ ctimestamp1,
+ ctimestamp2,
+ CASE
+ WHEN ctimestamp2 <= date '1800-12-31' THEN "1800s or Earlier"
+ WHEN ctimestamp2 < date '1900-01-01' THEN "1900s"
+ WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s"
+ ELSE "Unknown" END AS ctimestamp2_Description,
+ CASE
+ WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old"
+ WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s"
+ WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s"
+ ELSE NULL END AS ctimestamp2_Description_2,
+ CASE
+ WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old"
+ WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s"
+ WHEN ctimestamp2 between timestamp '2006-01-01 00:00:00.000' and timestamp '2010-12-31 23:59:59.999999999' THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN NULL
+ ELSE NULL END AS ctimestamp2_Description_3,
+ IF(timestamp '1974-10-04 17:21:03.989' > ctimestamp1, year(ctimestamp1), year(ctimestamp2)) AS field1,
+ CASE WHEN stimestamp1 LIKE '%19%'
+ THEN stimestamp1
+ ELSE timestamp '2018-03-08 23:04:59' END AS Field_2,
+ IF(ctimestamp1 = timestamp '2021-09-24 03:18:32.413655165' , NULL, minute(ctimestamp1)) AS Field_3,
+ IF(ctimestamp2 >= timestamp '5344-10-04 18:40:08.165' and ctimestamp2 < timestamp '6631-11-13 16:31:29.702202248', minute(ctimestamp1), NULL) AS Field_4,
+ IF(cast(ctimestamp1 as double) % 500 > 100, DATE_ADD(cdate, 1), DATE_ADD(cdate, 365)) AS Field_5
+FROM timestamps
+ORDER BY ctimestamp1, stimestamp1, ctimestamp2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@timestamps
+#### A masked pattern was here ####
+ctimestamp1 ctimestamp2 ctimestamp2_description ctimestamp2_description_2 ctimestamp2_description_3 field1 field_2 field_3 field_4 field_5
+NULL NULL Unknown NULL NULL NULL 2018-03-08 23:04:59 NULL NULL NULL
+0004-09-22 18:26:29.519542222 0004-09-21 16:23:25.519542222 1800s or Earlier Old Old 4 0004-09-22 18:26:29.519542222 26 NULL 0005-09-22
+0528-10-27 08:15:18.941718273 0528-10-26 06:12:14.941718273 1800s or Earlier Old Old 528 2018-03-08 23:04:59 15 NULL 0529-10-27
+1319-02-02 16:31:57.778 1319-02-01 14:28:53.778 1800s or Earlier Old Old 1319 1319-02-02 16:31:57.778 31 NULL 1320-02-02
+1404-07-23 15:32:16.059185026 1404-07-22 13:29:12.059185026 1800s or Earlier Old Old 1404 2018-03-08 23:04:59 32 NULL 1405-07-23
+1815-05-06 00:12:37.543584705 1815-05-04 22:09:33.543584705 1900s Old Old 1815 2018-03-08 23:04:59 12 NULL 1816-05-05
+1883-04-17 04:14:34.647766229 1883-04-16 02:11:30.647766229 1900s Old Old 1883 2018-03-08 23:04:59 14 NULL 1884-04-16
+1966-08-16 13:36:50.183618031 1966-08-15 11:33:46.183618031 Early 2010s Old Old 1966 1966-08-16 13:36:50.183618031 36 NULL 1967-08-16
+1973-04-17 06:30:38.596784156 1973-04-16 04:27:34.596784156 Early 2010s Old Old 1973 1973-04-17 06:30:38.596784156 30 NULL 1974-04-17
+1974-10-04 17:21:03.989 1974-10-03 15:17:59.989 Early 2010s Old Old 1974 1974-10-04 17:21:03.989 21 NULL 1974-10-05
+1976-03-03 04:54:33.000895162 1976-03-02 02:51:29.000895162 Early 2010s Old Old 1976 1976-03-03 04:54:33.000895162 54 NULL 1976-03-04
+1976-05-06 00:42:30.910786948 1976-05-04 22:39:26.910786948 Early 2010s Old Old 1976 1976-05-06 00:42:30.910786948 42 NULL 1977-05-06
+1978-08-05 14:41:05.501 1978-08-04 12:38:01.501 Early 2010s Old Old 1978 1978-08-05 14:41:05.501 41 NULL 1978-08-06
+1981-04-25 09:01:12.077192689 1981-04-24 06:58:08.077192689 Early 2010s Old Old 1981 1981-04-25 09:01:12.077192689 1 NULL 1982-04-25
+1981-11-15 23:03:10.999338387 1981-11-14 21:00:06.999338387 Early 2010s Old Old 1981 1981-11-15 23:03:10.999338387 3 NULL 1981-11-16
+1985-07-20 09:30:11 1985-07-19 07:27:07 Early 2010s Old Old 1985 1985-07-20 09:30:11 30 NULL 1986-07-20
+1985-11-18 16:37:54 1985-11-17 14:34:50 Early 2010s Old Old 1985 1985-11-18 16:37:54 37 NULL 1985-11-19
+1987-02-21 19:48:29 1987-02-20 17:45:25 Early 2010s Old Old 1987 1987-02-21 19:48:29 48 NULL 1987-02-22
+1987-05-28 13:52:07.900916635 1987-05-27 11:49:03.900916635 Early 2010s Old Old 1987 1987-05-28 13:52:07.900916635 52 NULL 1988-05-27
+1998-10-16 20:05:29.397591987 1998-10-15 18:02:25.397591987 Early 2010s Old Old 1998 1998-10-16 20:05:29.397591987 5 NULL 1999-10-16
+1999-10-03 16:59:10.396903939 1999-10-02 14:56:06.396903939 Early 2010s Old Old 1999 1999-10-03 16:59:10.396903939 59 NULL 1999-10-04
+2000-12-18 08:42:30.000595596 2000-12-17 06:39:26.000595596 Early 2010s Old Old 2000 2018-03-08 23:04:59 42 NULL 2000-12-19
+2002-05-10 05:29:48.990818073 2002-05-09 03:26:44.990818073 Early 2010s Early 2000s Early 2000s 2002 2018-03-08 23:04:59 29 NULL 2002-05-11
+2003-09-23 22:33:17.00003252 2003-09-22 20:30:13.00003252 Early 2010s Early 2000s Early 2000s 2003 2018-03-08 23:04:59 33 NULL 2004-09-22
+2004-03-07 20:14:13 2004-03-06 18:11:09 Early 2010s Early 2000s Early 2000s 2004 2018-03-08 23:04:59 14 NULL 2004-03-08
+2007-02-09 05:17:29.368756876 2007-02-08 03:14:25.368756876 Late 2000s Late 2000s Late 2000s 2007 2018-03-08 23:04:59 17 NULL 2008-02-09
+2009-01-21 10:49:07.108 2009-01-20 08:46:03.108 Late 2000s Late 2000s Late 2000s 2009 2018-03-08 23:04:59 49 NULL 2009-01-22
+2010-04-08 02:43:35.861742727 2010-04-07 00:40:31.861742727 Late 2000s Late 2000s Late 2000s 2010 2018-03-08 23:04:59 43 NULL 2010-04-09
+2013-04-07 02:44:43.00086821 2013-04-06 00:41:39.00086821 Early 2010s Early 2010s NULL 2013 2018-03-08 23:04:59 44 NULL 2013-04-08
+2013-04-10 00:43:46.854731546 2013-04-08 22:40:42.854731546 Early 2010s Early 2010s NULL 2013 2018-03-08 23:04:59 43 NULL 2013-04-11
+2021-09-24 03:18:32.413655165 2021-09-23 01:15:28.413655165 Unknown NULL NULL 2021 2018-03-08 23:04:59 NULL NULL 2021-09-25
+2024-11-11 16:42:41.101 2024-11-10 14:39:37.101 Unknown NULL NULL 2024 2018-03-08 23:04:59 42 NULL 2024-11-12
+4143-07-08 10:53:27.252802259 4143-07-07 08:50:23.252802259 Unknown NULL NULL 4143 2018-03-08 23:04:59 53 NULL 4143-07-09
+4966-12-04 09:30:55.202 4966-12-03 07:27:51.202 Unknown NULL NULL 4966 2018-03-08 23:04:59 30 NULL 4966-12-05
+5339-02-01 14:10:01.085678691 5339-01-31 12:06:57.085678691 Unknown NULL NULL 5339 2018-03-08 23:04:59 10 NULL 5340-02-01
+5344-10-04 18:40:08.165 5344-10-03 16:37:04.165 Unknown NULL NULL 5344 2018-03-08 23:04:59 40 NULL 5344-10-05
+5397-07-13 07:12:32.000896438 5397-07-12 05:09:28.000896438 Unknown NULL NULL 5397 2018-03-08 23:04:59 12 12 5397-07-14
+5966-07-09 03:30:50.597 5966-07-08 01:27:46.597 Unknown NULL NULL 5966 2018-03-08 23:04:59 30 30 5967-07-09
+6229-06-28 02:54:28.970117179 6229-06-27 00:51:24.970117179 Unknown NULL NULL 6229 2018-03-08 23:04:59 54 54 6230-06-28
+6482-04-27 12:07:38.073915413 6482-04-26 10:04:34.073915413 Unknown NULL NULL 6482 2018-03-08 23:04:59 7 7 6482-04-28
+6631-11-13 16:31:29.702202248 6631-11-12 14:28:25.702202248 Unknown NULL NULL 6631 2018-03-08 23:04:59 31 31 6631-11-14
+6705-09-28 18:27:28.000845672 6705-09-27 16:24:24.000845672 Unknown NULL NULL 6705 2018-03-08 23:04:59 27 NULL 6705-09-29
+6731-02-12 08:12:48.287783702 6731-02-11 06:09:44.287783702 Unknown NULL NULL 6731 2018-03-08 23:04:59 12 NULL 6731-02-13
+7160-12-02 06:00:24.81200852 7160-12-01 03:57:20.81200852 Unknown NULL NULL 7160 2018-03-08 23:04:59 0 NULL 7161-12-02
+7409-09-07 23:33:32.459349602 7409-09-06 21:30:28.459349602 Unknown NULL NULL 7409 2018-03-08 23:04:59 33 NULL 7409-09-08
+7503-06-23 23:14:17.486 7503-06-22 21:11:13.486 Unknown NULL NULL 7503 2018-03-08 23:04:59 14 NULL 7503-06-24
+8422-07-22 03:21:45.745036084 8422-07-21 01:18:41.745036084 Unknown NULL NULL 8422 2018-03-08 23:04:59 21 NULL 8422-07-23
+8521-01-16 20:42:05.668832388 8521-01-15 18:39:01.668832388 Unknown NULL NULL 8521 2018-03-08 23:04:59 42 NULL 8521-01-17
+9075-06-13 16:20:09.218517797 9075-06-12 14:17:05.218517797 Unknown NULL NULL 9075 2018-03-08 23:04:59 20 NULL 9075-06-14
+9209-11-11 04:08:58.223768453 9209-11-10 02:05:54.223768453 Unknown NULL NULL 9209 2018-03-08 23:04:59 8 NULL 9209-11-12
+9403-01-09 18:12:33.547 9403-01-08 16:09:29.547 Unknown NULL NULL 9403 2018-03-08 23:04:59 12 NULL 9403-01-10
http://git-wip-us.apache.org/repos/asf/hive/blob/53980ba6/ql/src/test/results/clientpositive/vector_when_case_null.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_when_case_null.q.out b/ql/src/test/results/clientpositive/vector_when_case_null.q.out
index d7cc0b7..13fb6d1 100644
--- a/ql/src/test/results/clientpositive/vector_when_case_null.q.out
+++ b/ql/src/test/results/clientpositive/vector_when_case_null.q.out
@@ -45,13 +45,13 @@ STAGE PLANS:
Select Vectorization:
className: VectorSelectOperator
native: true
- projectedOutputColumnNums: [0, 6]
- selectExpressions: IfExprLongScalarLongColumn(col 1:boolean, val 1, col 5:int)(children: IfExprColumnNull(col 3:boolean, col 4:int, null)(children: NotCol(col 1:boolean) -> 3:boolean, ConstantVectorExpression(val 0) -> 4:int) -> 5:int) -> 6:int
+ projectedOutputColumnNums: [0, 7]
+ selectExpressions: IfExprColumnCondExpr(col 1:boolean, col 3:intcol 6:int)(children: col 1:boolean, ConstantVectorExpression(val 1) -> 3:int, IfExprColumnNull(col 4:boolean, col 5:int, null)(children: NotCol(col 1:boolean) -> 4:boolean, ConstantVectorExpression(val 0) -> 5:int) -> 6:int) -> 7:int
Statistics: Num rows: 5 Data size: 452 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(_col1)
Group By Vectorization:
- aggregators: VectorUDAFCount(col 6:int) -> bigint
+ aggregators: VectorUDAFCount(col 7:int) -> bigint
className: VectorGroupByOperator
groupByMode: HASH
keyExpressions: col 0:string
http://git-wip-us.apache.org/repos/asf/hive/blob/53980ba6/ql/src/test/results/clientpositive/vectorized_case.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vectorized_case.q.out b/ql/src/test/results/clientpositive/vectorized_case.q.out
index 428781f..74b4426 100644
--- a/ql/src/test/results/clientpositive/vectorized_case.q.out
+++ b/ql/src/test/results/clientpositive/vectorized_case.q.out
@@ -65,8 +65,8 @@ STAGE PLANS:
Select Vectorization:
className: VectorSelectOperator
native: true
- projectedOutputColumnNums: [1, 16, 17]
- selectExpressions: IfExprStringScalarStringGroupColumn(col 13:boolean, val acol 15:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 13:boolean, IfExprStringScalarStringScalar(col 14:boolean, val b, val c)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 14:boolean) -> 15:string) -> 16:string, IfExprStringScalarStringGroupColumn(col 13:boolean, val acol 15:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 13:boolean, IfExprStringScalarStringScalar(col 14:boolean, val b, val c)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 14:boolean) -> 15:string) -> 17:string
+ projectedOutputColumnNums: [1, 17, 21]
+ selectExpressions: IfExprColumnCondExpr(col 13:boolean, col 14:stringcol 16:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 13:boolean, ConstantVectorExpression(val a) -> 14:string, IfExprStringScalarStringScalar(col 15:boolean, val b, val c)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 15:boolean) -> 16:string) -> 17:string, IfExprColumnCondExpr(col 15:boolean, col 18:stringcol 20:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 15:boolean, ConstantVectorExpression(val a) -> 18:string, IfExprStringScalarStringScalar(col 19:boolean, val b, val c)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 19:boolean) -> 20:string) -> 21:string
Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
@@ -93,7 +93,7 @@ STAGE PLANS:
includeColumns: [1]
dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
partitionColumnCount: 0
- scratchColumnTypeNames: [bigint, bigint, string, string, string]
+ scratchColumnTypeNames: [bigint, string, bigint, string, string, string, bigint, string, string]
Stage: Stage-0
Fetch Operator
@@ -210,8 +210,8 @@ STAGE PLANS:
Select Vectorization:
className: VectorSelectOperator
native: true
- projectedOutputColumnNums: [1, 17, 20]
- selectExpressions: IfExprStringScalarStringGroupColumn(col 13:boolean, val acol 16:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 13:boolean, IfExprColumnNull(col 14:boolean, col 15:string, null)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 14:boolean, ConstantVectorExpression(val b) -> 15:string) -> 16:string) -> 17:string, IfExprStringScalarStringGroupColumn(col 13:boolean, val acol 19:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 13:boolean, IfExprNullColumn(col 18:boolean, null, col 16)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 18:boolean, ConstantVectorExpression(val c) -> 16:string) -> 19:string) -> 20:string
+ projectedOutputColumnNums: [1, 18, 24]
+ selectExpressions: IfExprColumnCondExpr(col 13:boolean, col 14:stringcol 17:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 13:boolean, ConstantVectorExpression(val a) -> 14:string, IfExprColumnNull(col 15:boolean, col 16:string, null)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 15:boolean, ConstantVectorExpression(val b) -> 16:string) -> 17:string) -> 18:string, IfExprColumnCondExpr(col 19:boolean, col 20:stringcol 23:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 19:boolean, ConstantVectorExpression(val a) -> 20:string, IfExprNullColumn(col 21:boolean, null, col 22)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 21:boolean, ConstantVectorExpression(val c) -> 22:string) -> 23:string) -> 24:string
Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
@@ -238,7 +238,7 @@ STAGE PLANS:
includeColumns: [1]
dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
partitionColumnCount: 0
- scratchColumnTypeNames: [bigint, bigint, string, string, string, bigint, string, string]
+ scratchColumnTypeNames: [bigint, string, bigint, string, string, string, bigint, string, bigint, string, string, string]
Stage: Stage-0
Fetch Operator
@@ -525,7 +525,7 @@ STAGE PLANS:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [6]
- selectExpressions: VectorUDFAdaptor(if((member = 1), (attr + 1), (attr + 2)))(children: VectorUDFAdaptor((member = 1)) -> 3:boolean, DecimalColAddDecimalScalar(col 1:decimal(10,0), val 1) -> 4:decimal(11,0), DecimalColAddDecimalScalar(col 1:decimal(10,0), val 2) -> 5:decimal(11,0)) -> 6:decimal(11,0)
+ selectExpressions: IfExprCondExprCondExpr(col 3:boolean, col 4:decimal(11,0)col 5:decimal(11,0))(children: VectorUDFAdaptor((member = 1)) -> 3:boolean, DecimalColAddDecimalScalar(col 1:decimal(10,0), val 1) -> 4:decimal(11,0), DecimalColAddDecimalScalar(col 1:decimal(10,0), val 2) -> 5:decimal(11,0)) -> 6:decimal(11,0)
Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
@@ -601,8 +601,8 @@ STAGE PLANS:
Select Vectorization:
className: VectorSelectOperator
native: true
- projectedOutputColumnNums: [5]
- selectExpressions: VectorUDFAdaptor(if((member = 1), 1, (attr + 2)))(children: VectorUDFAdaptor((member = 1)) -> 3:boolean, DecimalColAddDecimalScalar(col 1:decimal(10,0), val 2) -> 4:decimal(11,0)) -> 5:decimal(11,0)
+ projectedOutputColumnNums: [6]
+ selectExpressions: IfExprColumnCondExpr(col 3:boolean, col 4:decimal(1,0)col 5:decimal(11,0))(children: VectorUDFAdaptor((member = 1)) -> 3:boolean, ConstantVectorExpression(val 1) -> 4:decimal(1,0), DecimalColAddDecimalScalar(col 1:decimal(10,0), val 2) -> 5:decimal(11,0)) -> 6:decimal(11,0)
Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
@@ -629,7 +629,7 @@ STAGE PLANS:
includeColumns: [0, 1]
dataColumns: member:decimal(10,0), attr:decimal(10,0)
partitionColumnCount: 0
- scratchColumnTypeNames: [bigint, decimal(11,0), decimal(11,0)]
+ scratchColumnTypeNames: [bigint, decimal(1,0), decimal(11,0), decimal(11,0)]
Stage: Stage-0
Fetch Operator
@@ -678,8 +678,8 @@ STAGE PLANS:
Select Vectorization:
className: VectorSelectOperator
native: true
- projectedOutputColumnNums: [5]
- selectExpressions: VectorUDFAdaptor(if((member = 1), (attr + 1), 2))(children: VectorUDFAdaptor((member = 1)) -> 3:boolean, DecimalColAddDecimalScalar(col 1:decimal(10,0), val 1) -> 4:decimal(11,0)) -> 5:decimal(11,0)
+ projectedOutputColumnNums: [6]
+ selectExpressions: IfExprCondExprColumn(col 3:boolean, col 4:decimal(11,0), col 5:decimal(1,0))(children: VectorUDFAdaptor((member = 1)) -> 3:boolean, DecimalColAddDecimalScalar(col 1:decimal(10,0), val 1) -> 4:decimal(11,0), ConstantVectorExpression(val 2) -> 5:decimal(1,0)) -> 6:decimal(11,0)
Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
@@ -706,7 +706,7 @@ STAGE PLANS:
includeColumns: [0, 1]
dataColumns: member:decimal(10,0), attr:decimal(10,0)
partitionColumnCount: 0
- scratchColumnTypeNames: [bigint, decimal(11,0), decimal(11,0)]
+ scratchColumnTypeNames: [bigint, decimal(11,0), decimal(1,0), decimal(11,0)]
Stage: Stage-0
Fetch Operator
@@ -774,7 +774,7 @@ STAGE PLANS:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [6]
- selectExpressions: IfExprLongColumnLongColumn(col 3:boolean, col 4:bigint, col 5:bigint)(children: LongColEqualLongScalar(col 0:bigint, val 1) -> 3:boolean, LongColAddLongScalar(col 1:bigint, val 1) -> 4:bigint, LongColAddLongScalar(col 1:bigint, val 2) -> 5:bigint) -> 6:bigint
+ selectExpressions: IfExprCondExprCondExpr(col 3:boolean, col 4:bigintcol 5:bigint)(children: LongColEqualLongScalar(col 0:bigint, val 1) -> 3:boolean, LongColAddLongScalar(col 1:bigint, val 1) -> 4:bigint, LongColAddLongScalar(col 1:bigint, val 2) -> 5:bigint) -> 6:bigint
Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
@@ -851,7 +851,7 @@ STAGE PLANS:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [5]
- selectExpressions: IfExprNullColumn(col 3:boolean, null, col 4)(children: LongColEqualLongScalar(col 0:bigint, val 1) -> 3:boolean, LongColAddLongScalar(col 1:bigint, val 2) -> 4:bigint) -> 5:bigint
+ selectExpressions: IfExprNullCondExpr(col 3:boolean, null, col 4:bigint)(children: LongColEqualLongScalar(col 0:bigint, val 1) -> 3:boolean, LongColAddLongScalar(col 1:bigint, val 2) -> 4:bigint) -> 5:bigint
Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
@@ -928,7 +928,7 @@ STAGE PLANS:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [5]
- selectExpressions: IfExprColumnNull(col 3:boolean, col 4:bigint, null)(children: LongColEqualLongScalar(col 0:bigint, val 1) -> 3:boolean, LongColAddLongScalar(col 1:bigint, val 1) -> 4:bigint) -> 5:bigint
+ selectExpressions: IfExprCondExprNull(col 3:boolean, col 4:bigint, null)(children: LongColEqualLongScalar(col 0:bigint, val 1) -> 3:boolean, LongColAddLongScalar(col 1:bigint, val 1) -> 4:bigint) -> 5:bigint
Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
[4/7] hive git commit: HIVE-18819: Vectorization: Optimize IF
statement expression evaluation of THEN/ELSE (Matt McCline,
reviewed by Teddy Choi)
Posted by mm...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/53980ba6/ql/src/test/results/clientpositive/llap/vector_udf_adaptor_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_udf_adaptor_1.q.out b/ql/src/test/results/clientpositive/llap/vector_udf_adaptor_1.q.out
index 7402667..4e36f37 100644
--- a/ql/src/test/results/clientpositive/llap/vector_udf_adaptor_1.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_udf_adaptor_1.q.out
@@ -1,4 +1,4 @@
-PREHOOK: query: create table student_2_lines(
+PREHOOK: query: create table student_10_lines_txt(
name string,
age int,
gpa double)
@@ -7,8 +7,8 @@ fields terminated by '\001'
stored as textfile
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
-PREHOOK: Output: default@student_2_lines
-POSTHOOK: query: create table student_2_lines(
+PREHOOK: Output: default@student_10_lines_txt
+POSTHOOK: query: create table student_10_lines_txt(
name string,
age int,
gpa double)
@@ -17,45 +17,431 @@ fields terminated by '\001'
stored as textfile
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
-POSTHOOK: Output: default@student_2_lines
-PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/student_2_lines' OVERWRITE INTO TABLE student_2_lines
+POSTHOOK: Output: default@student_10_lines_txt
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/student_10_lines' OVERWRITE INTO TABLE student_10_lines_txt
PREHOOK: type: LOAD
#### A masked pattern was here ####
-PREHOOK: Output: default@student_2_lines
-POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/student_2_lines' OVERWRITE INTO TABLE student_2_lines
+PREHOOK: Output: default@student_10_lines_txt
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/student_10_lines' OVERWRITE INTO TABLE student_10_lines_txt
POSTHOOK: type: LOAD
#### A masked pattern was here ####
-POSTHOOK: Output: default@student_2_lines
-PREHOOK: query: analyze table student_2_lines compute statistics
+POSTHOOK: Output: default@student_10_lines_txt
+PREHOOK: query: CREATE TABLE student_10_lines STORED AS ORC AS SELECT * FROM student_10_lines_txt
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@student_10_lines_txt
+PREHOOK: Output: database:default
+PREHOOK: Output: default@student_10_lines
+POSTHOOK: query: CREATE TABLE student_10_lines STORED AS ORC AS SELECT * FROM student_10_lines_txt
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@student_10_lines_txt
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@student_10_lines
+POSTHOOK: Lineage: student_10_lines.age SIMPLE [(student_10_lines_txt)student_10_lines_txt.FieldSchema(name:age, type:int, comment:null), ]
+POSTHOOK: Lineage: student_10_lines.gpa SIMPLE [(student_10_lines_txt)student_10_lines_txt.FieldSchema(name:gpa, type:double, comment:null), ]
+POSTHOOK: Lineage: student_10_lines.name SIMPLE [(student_10_lines_txt)student_10_lines_txt.FieldSchema(name:name, type:string, comment:null), ]
+student_10_lines_txt.name student_10_lines_txt.age student_10_lines_txt.gpa
+PREHOOK: query: INSERT INTO TABLE student_10_lines VALUES (NULL, NULL, NULL)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@student_10_lines
+POSTHOOK: query: INSERT INTO TABLE student_10_lines VALUES (NULL, NULL, NULL)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@student_10_lines
+POSTHOOK: Lineage: student_10_lines.age EXPRESSION []
+POSTHOOK: Lineage: student_10_lines.gpa EXPRESSION []
+POSTHOOK: Lineage: student_10_lines.name EXPRESSION []
+_col0 _col1 _col2
+PREHOOK: query: INSERT INTO TABLE student_10_lines VALUES ("George", 22, 3.8)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@student_10_lines
+POSTHOOK: query: INSERT INTO TABLE student_10_lines VALUES ("George", 22, 3.8)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@student_10_lines
+POSTHOOK: Lineage: student_10_lines.age SCRIPT []
+POSTHOOK: Lineage: student_10_lines.gpa SCRIPT []
+POSTHOOK: Lineage: student_10_lines.name SCRIPT []
+_col0 _col1 _col2
+PREHOOK: query: analyze table student_10_lines compute statistics
+PREHOOK: type: QUERY
+PREHOOK: Input: default@student_10_lines
+PREHOOK: Output: default@student_10_lines
+POSTHOOK: query: analyze table student_10_lines compute statistics
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@student_10_lines
+POSTHOOK: Output: default@student_10_lines
+student_10_lines.name student_10_lines.age student_10_lines.gpa
+PREHOOK: query: create table insert_a_adaptor (name string, age int, gpa double, a int, b timestamp, c string, d binary, e int, f double)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@insert_a_adaptor
+POSTHOOK: query: create table insert_a_adaptor (name string, age int, gpa double, a int, b timestamp, c string, d binary, e int, f double)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@insert_a_adaptor
+PREHOOK: query: explain vectorization detail
+insert overwrite table insert_a_adaptor
+ select
+ name,
+ age,
+ gpa,
+ IF(age<40, age, NULL),
+ IF(age>40, cast('2011-01-01 01:01:01' as timestamp), NULL),
+ IF(LENGTH(name)>8, name, NULL),
+ IF(LENGTH(name)<8, cast(name as binary), NULL),
+ IF(age>40, LENGTH(name), NULL),
+ IF(LENGTH(name)> 10, 2 * gpa, NULL)
+ from student_10_lines
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+insert overwrite table insert_a_adaptor
+ select
+ name,
+ age,
+ gpa,
+ IF(age<40, age, NULL),
+ IF(age>40, cast('2011-01-01 01:01:01' as timestamp), NULL),
+ IF(LENGTH(name)>8, name, NULL),
+ IF(LENGTH(name)<8, cast(name as binary), NULL),
+ IF(age>40, LENGTH(name), NULL),
+ IF(LENGTH(name)> 10, 2 * gpa, NULL)
+ from student_10_lines
+POSTHOOK: type: QUERY
+Explain
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+ Stage-3 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: student_10_lines
+ Statistics: Num rows: 12 Data size: 2352 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: name (type: string), age (type: int), gpa (type: double), if((age < 40), age, null) (type: int), if((age > 40), TIMESTAMP'2011-01-01 01:01:01.0', null) (type: timestamp), if((length(name) > 8), name, null) (type: string), if((length(name) < 8), CAST( name AS BINARY), null) (type: binary), if((age > 40), length(name), null) (type: int), if((length(name) > 10), (2.0D * gpa), null) (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 12 Data size: 2352 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 12 Data size: 2352 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.insert_a_adaptor
+ Execution mode: llap
+ LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ notVectorizedReason: SELECT operator: Unexpected primitive type category VOID
+ vectorized: false
+
+ Stage: Stage-2
+ Dependency Collection
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.insert_a_adaptor
+
+ Stage: Stage-3
+ Stats Work
+ Basic Stats Work:
+
+PREHOOK: query: insert overwrite table insert_a_adaptor
+ select
+ name,
+ age,
+ gpa,
+ IF(age<40, age, NULL),
+ IF(age>40, cast('2011-01-01 01:01:01' as timestamp), NULL),
+ IF(LENGTH(name)>8, name, NULL),
+ IF(LENGTH(name)<8, cast(name as binary), NULL),
+ IF(age>40, LENGTH(name), NULL),
+ IF(LENGTH(name)> 10, 2 * gpa, NULL)
+ from student_10_lines
+PREHOOK: type: QUERY
+PREHOOK: Input: default@student_10_lines
+PREHOOK: Output: default@insert_a_adaptor
+POSTHOOK: query: insert overwrite table insert_a_adaptor
+ select
+ name,
+ age,
+ gpa,
+ IF(age<40, age, NULL),
+ IF(age>40, cast('2011-01-01 01:01:01' as timestamp), NULL),
+ IF(LENGTH(name)>8, name, NULL),
+ IF(LENGTH(name)<8, cast(name as binary), NULL),
+ IF(age>40, LENGTH(name), NULL),
+ IF(LENGTH(name)> 10, 2 * gpa, NULL)
+ from student_10_lines
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@student_10_lines
+POSTHOOK: Output: default@insert_a_adaptor
+POSTHOOK: Lineage: insert_a_adaptor.a EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), ]
+POSTHOOK: Lineage: insert_a_adaptor.age SIMPLE [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), ]
+POSTHOOK: Lineage: insert_a_adaptor.b EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), ]
+POSTHOOK: Lineage: insert_a_adaptor.c EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ]
+POSTHOOK: Lineage: insert_a_adaptor.d EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ]
+POSTHOOK: Lineage: insert_a_adaptor.e EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), (student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ]
+POSTHOOK: Lineage: insert_a_adaptor.f EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), (student_10_lines)student_10_lines.FieldSchema(name:gpa, type:double, comment:null), ]
+POSTHOOK: Lineage: insert_a_adaptor.gpa SIMPLE [(student_10_lines)student_10_lines.FieldSchema(name:gpa, type:double, comment:null), ]
+POSTHOOK: Lineage: insert_a_adaptor.name SIMPLE [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ]
+name age gpa _c3 _c4 _c5 _c6 _c7 _c8
+PREHOOK: query: select * from insert_a_adaptor
+PREHOOK: type: QUERY
+PREHOOK: Input: default@insert_a_adaptor
+#### A masked pattern was here ####
+POSTHOOK: query: select * from insert_a_adaptor
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@insert_a_adaptor
+#### A masked pattern was here ####
+insert_a_adaptor.name insert_a_adaptor.age insert_a_adaptor.gpa insert_a_adaptor.a insert_a_adaptor.b insert_a_adaptor.c insert_a_adaptor.d insert_a_adaptor.e insert_a_adaptor.f
+George 22 3.8 22 NULL NULL George NULL NULL
+NULL NULL NULL NULL NULL NULL NULL NULL NULL
+calvin brown 28 2.7 28 NULL calvin brown NULL NULL 5.4
+luke brown 60 1.14 NULL 2011-01-01 01:01:01 luke brown NULL 10 NULL
+luke king 28 0.47 28 NULL luke king NULL NULL NULL
+nick johnson 34 NULL 34 NULL nick johnson NULL NULL NULL
+oscar thompson 35 2.98 35 NULL oscar thompson NULL NULL 5.96
+priscilla falkner 55 1.16 NULL 2011-01-01 01:01:01 priscilla falkner NULL 17 2.32
+quinn ovid 19 NULL 19 NULL quinn ovid NULL NULL NULL
+tom thompson 42 0.53 NULL 2011-01-01 01:01:01 tom thompson NULL 12 1.06
+ulysses garcia 35 2.74 35 NULL ulysses garcia NULL NULL 5.48
+xavier garcia 33 1.06 33 NULL xavier garcia NULL NULL 2.12
+PREHOOK: query: create table insert_a_good (name string, age int, gpa double, a int, b timestamp, c string, d binary, e int, f double)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@insert_a_good
+POSTHOOK: query: create table insert_a_good (name string, age int, gpa double, a int, b timestamp, c string, d binary, e int, f double)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@insert_a_good
+PREHOOK: query: explain vectorization detail
+insert overwrite table insert_a_good
+ select
+ name,
+ age,
+ gpa,
+ IF(age<40, age, NULL),
+ IF(age>40, cast('2011-01-01 01:01:01' as timestamp), NULL),
+ IF(LENGTH(name)>8, name, NULL),
+ IF(LENGTH(name)<8, cast(name as binary), NULL),
+ IF(age>40, LENGTH(name), NULL),
+ IF(LENGTH(name)> 10, 2 * gpa, NULL)
+ from student_10_lines
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+insert overwrite table insert_a_good
+ select
+ name,
+ age,
+ gpa,
+ IF(age<40, age, NULL),
+ IF(age>40, cast('2011-01-01 01:01:01' as timestamp), NULL),
+ IF(LENGTH(name)>8, name, NULL),
+ IF(LENGTH(name)<8, cast(name as binary), NULL),
+ IF(age>40, LENGTH(name), NULL),
+ IF(LENGTH(name)> 10, 2 * gpa, NULL)
+ from student_10_lines
+POSTHOOK: type: QUERY
+Explain
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+ Stage-3 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: student_10_lines
+ Statistics: Num rows: 12 Data size: 2352 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:name:string, 1:age:int, 2:gpa:double, 3:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>]
+ Select Operator
+ expressions: name (type: string), age (type: int), gpa (type: double), if((age < 40), age, null) (type: int), if((age > 40), TIMESTAMP'2011-01-01 01:01:01.0', null) (type: timestamp), if((length(name) > 8), name, null) (type: string), if((length(name) < 8), CAST( name AS BINARY), null) (type: binary), if((age > 40), length(name), null) (type: int), if((length(name) > 10), (2.0D * gpa), null) (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 1, 2, 5, 8, 11, 14, 16, 20]
+ selectExpressions: IfExprColumnNull(col 4:boolean, col 1:int, null)(children: LongColLessLongScalar(col 1:int, val 40) -> 4:boolean, col 1:int) -> 5:int, IfExprColumnNull(col 6:boolean, col 7:timestamp, null)(children: LongColGreaterLongScalar(col 1:int, val 40) -> 6:boolean, ConstantVectorExpression(val 2011-01-01 01:01:01.0) -> 7:timestamp) -> 8:timestamp, IfExprColumnNull(col 10:boolean, col 0:string, null)(children: LongColGreaterLongScalar(col 9:int, val 8)(children: StringLength(col 0:string) -> 9:int) -> 10:boolean, col 0:string) -> 11:string, IfExprColumnNull(col 12:boolean, col 13:binary, null)(children: LongColLessLongScalar(col 9:int, val 8)(children: StringLength(col 0:string) -> 9:int) -> 12:boolean, VectorUDFAdaptor(CAST( name AS BINARY)) -> 13:binary) -> 14:binary, IfExprColumnNull(col 9:boolean, col 15:int, null)(children: LongColGreaterLongScalar(col 1:int, val 40) -> 9:boolean, StringLength(col 0:string) -> 15:int) -> 16:int, IfExprColumnNul
l(col 18:boolean, col 19:double, null)(children: LongColGreaterLongScalar(col 17:int, val 10)(children: StringLength(col 0:string) -> 17:int) -> 18:boolean, DoubleScalarMultiplyDoubleColumn(val 2.0, col 2:double) -> 19:double) -> 20:double
+ Statistics: Num rows: 12 Data size: 2352 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 12 Data size: 2352 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.insert_a_good
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: []
+ featureSupportInUse: []
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: true
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 3
+ includeColumns: [0, 1, 2]
+ dataColumns: name:string, age:int, gpa:double
+ partitionColumnCount: 0
+ scratchColumnTypeNames: [bigint, bigint, bigint, timestamp, timestamp, bigint, bigint, string, bigint, string, string, bigint, bigint, bigint, bigint, double, double]
+
+ Stage: Stage-2
+ Dependency Collection
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.insert_a_good
+
+ Stage: Stage-3
+ Stats Work
+ Basic Stats Work:
+
+PREHOOK: query: insert overwrite table insert_a_good
+ select
+ name,
+ age,
+ gpa,
+ IF(age<40, age, NULL),
+ IF(age>40, cast('2011-01-01 01:01:01' as timestamp), NULL),
+ IF(LENGTH(name)>8, name, NULL),
+ IF(LENGTH(name)<8, cast(name as binary), NULL),
+ IF(age>40, LENGTH(name), NULL),
+ IF(LENGTH(name)> 10, 2 * gpa, NULL)
+ from student_10_lines
+PREHOOK: type: QUERY
+PREHOOK: Input: default@student_10_lines
+PREHOOK: Output: default@insert_a_good
+POSTHOOK: query: insert overwrite table insert_a_good
+ select
+ name,
+ age,
+ gpa,
+ IF(age<40, age, NULL),
+ IF(age>40, cast('2011-01-01 01:01:01' as timestamp), NULL),
+ IF(LENGTH(name)>8, name, NULL),
+ IF(LENGTH(name)<8, cast(name as binary), NULL),
+ IF(age>40, LENGTH(name), NULL),
+ IF(LENGTH(name)> 10, 2 * gpa, NULL)
+ from student_10_lines
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@student_10_lines
+POSTHOOK: Output: default@insert_a_good
+POSTHOOK: Lineage: insert_a_good.a EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), ]
+POSTHOOK: Lineage: insert_a_good.age SIMPLE [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), ]
+POSTHOOK: Lineage: insert_a_good.b EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), ]
+POSTHOOK: Lineage: insert_a_good.c EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ]
+POSTHOOK: Lineage: insert_a_good.d EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ]
+POSTHOOK: Lineage: insert_a_good.e EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), (student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ]
+POSTHOOK: Lineage: insert_a_good.f EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), (student_10_lines)student_10_lines.FieldSchema(name:gpa, type:double, comment:null), ]
+POSTHOOK: Lineage: insert_a_good.gpa SIMPLE [(student_10_lines)student_10_lines.FieldSchema(name:gpa, type:double, comment:null), ]
+POSTHOOK: Lineage: insert_a_good.name SIMPLE [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ]
+name age gpa _c3 _c4 _c5 _c6 _c7 _c8
+PREHOOK: query: select * from insert_a_good
PREHOOK: type: QUERY
-PREHOOK: Input: default@student_2_lines
-PREHOOK: Output: default@student_2_lines
-POSTHOOK: query: analyze table student_2_lines compute statistics
+PREHOOK: Input: default@insert_a_good
+#### A masked pattern was here ####
+POSTHOOK: query: select * from insert_a_good
POSTHOOK: type: QUERY
-POSTHOOK: Input: default@student_2_lines
-POSTHOOK: Output: default@student_2_lines
-PREHOOK: query: create table insert_10_1 (a float, b int, c timestamp, d binary)
+POSTHOOK: Input: default@insert_a_good
+#### A masked pattern was here ####
+insert_a_good.name insert_a_good.age insert_a_good.gpa insert_a_good.a insert_a_good.b insert_a_good.c insert_a_good.d insert_a_good.e insert_a_good.f
+George 22 3.8 22 NULL NULL George NULL NULL
+NULL NULL NULL NULL NULL NULL NULL NULL NULL
+calvin brown 28 2.7 28 NULL calvin brown NULL NULL 5.4
+luke brown 60 1.14 NULL 2011-01-01 01:01:01 luke brown NULL 10 NULL
+luke king 28 0.47 28 NULL luke king NULL NULL NULL
+nick johnson 34 NULL 34 NULL nick johnson NULL NULL NULL
+oscar thompson 35 2.98 35 NULL oscar thompson NULL NULL 5.96
+priscilla falkner 55 1.16 NULL 2011-01-01 01:01:01 priscilla falkner NULL 17 2.32
+quinn ovid 19 NULL 19 NULL quinn ovid NULL NULL NULL
+tom thompson 42 0.53 NULL 2011-01-01 01:01:01 tom thompson NULL 12 1.06
+ulysses garcia 35 2.74 35 NULL ulysses garcia NULL NULL 5.48
+xavier garcia 33 1.06 33 NULL xavier garcia NULL NULL 2.12
+PREHOOK: query: create table insert_a_better (name string, age int, gpa double, a int, b timestamp, c string, d binary, e int, f double)
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
-PREHOOK: Output: default@insert_10_1
-POSTHOOK: query: create table insert_10_1 (a float, b int, c timestamp, d binary)
+PREHOOK: Output: default@insert_a_better
+POSTHOOK: query: create table insert_a_better (name string, age int, gpa double, a int, b timestamp, c string, d binary, e int, f double)
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
-POSTHOOK: Output: default@insert_10_1
+POSTHOOK: Output: default@insert_a_better
PREHOOK: query: explain vectorization detail
-insert overwrite table insert_10_1
- select cast(gpa as float),
- age,
- IF(age>40,cast('2011-01-01 01:01:01' as timestamp),NULL),
- IF(LENGTH(name)>10,cast(name as binary),NULL) from student_2_lines
+insert overwrite table insert_a_better
+ select
+ name,
+ age,
+ gpa,
+ IF(age<40, age, NULL),
+ IF(age>40, cast('2011-01-01 01:01:01' as timestamp), NULL),
+ IF(LENGTH(name)>8, name, NULL),
+ IF(LENGTH(name)<8, cast(name as binary), NULL),
+ IF(age>40, LENGTH(name), NULL),
+ IF(LENGTH(name)> 10, 2 * gpa, NULL)
+ from student_10_lines
PREHOOK: type: QUERY
POSTHOOK: query: explain vectorization detail
-insert overwrite table insert_10_1
- select cast(gpa as float),
- age,
- IF(age>40,cast('2011-01-01 01:01:01' as timestamp),NULL),
- IF(LENGTH(name)>10,cast(name as binary),NULL) from student_2_lines
+insert overwrite table insert_a_better
+ select
+ name,
+ age,
+ gpa,
+ IF(age<40, age, NULL),
+ IF(age>40, cast('2011-01-01 01:01:01' as timestamp), NULL),
+ IF(LENGTH(name)>8, name, NULL),
+ IF(LENGTH(name)<8, cast(name as binary), NULL),
+ IF(age>40, LENGTH(name), NULL),
+ IF(LENGTH(name)> 10, 2 * gpa, NULL)
+ from student_10_lines
POSTHOOK: type: QUERY
+Explain
PLAN VECTORIZATION:
enabled: true
enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
@@ -74,40 +460,39 @@ STAGE PLANS:
Map 1
Map Operator Tree:
TableScan
- alias: student_2_lines
- Statistics: Num rows: 2 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+ alias: student_10_lines
+ Statistics: Num rows: 12 Data size: 2352 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
vectorizationSchemaColumns: [0:name:string, 1:age:int, 2:gpa:double, 3:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>]
Select Operator
- expressions: UDFToFloat(gpa) (type: float), age (type: int), if((age > 40), TIMESTAMP'2011-01-01 01:01:01.0', null) (type: timestamp), if((length(name) > 10), CAST( name AS BINARY), null) (type: binary)
- outputColumnNames: _col0, _col1, _col2, _col3
+ expressions: name (type: string), age (type: int), gpa (type: double), if((age < 40), age, null) (type: int), if((age > 40), TIMESTAMP'2011-01-01 01:01:01.0', null) (type: timestamp), if((length(name) > 8), name, null) (type: string), if((length(name) < 8), CAST( name AS BINARY), null) (type: binary), if((age > 40), length(name), null) (type: int), if((length(name) > 10), (2.0D * gpa), null) (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
Select Vectorization:
className: VectorSelectOperator
native: true
- projectedOutputColumnNums: [2, 1, 5, 8]
- selectExpressions: VectorUDFAdaptor(if((age > 40), TIMESTAMP'2011-01-01 01:01:01.0', null))(children: LongColGreaterLongScalar(col 1:int, val 40) -> 4:boolean) -> 5:timestamp, VectorUDFAdaptor(if((length(name) > 10), CAST( name AS BINARY), null))(children: LongColGreaterLongScalar(col 4:int, val 10)(children: StringLength(col 0:string) -> 4:int) -> 6:boolean, VectorUDFAdaptor(CAST( name AS BINARY)) -> 7:binary) -> 8:binary
- Statistics: Num rows: 2 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+ projectedOutputColumnNums: [0, 1, 2, 5, 8, 11, 14, 16, 20]
+ selectExpressions: IfExprColumnNull(col 4:boolean, col 1:int, null)(children: LongColLessLongScalar(col 1:int, val 40) -> 4:boolean, col 1:int) -> 5:int, IfExprColumnNull(col 6:boolean, col 7:timestamp, null)(children: LongColGreaterLongScalar(col 1:int, val 40) -> 6:boolean, ConstantVectorExpression(val 2011-01-01 01:01:01.0) -> 7:timestamp) -> 8:timestamp, IfExprColumnNull(col 10:boolean, col 0:string, null)(children: LongColGreaterLongScalar(col 9:int, val 8)(children: StringLength(col 0:string) -> 9:int) -> 10:boolean, col 0:string) -> 11:string, IfExprCondExprNull(col 12:boolean, col 13:binary, null)(children: LongColLessLongScalar(col 9:int, val 8)(children: StringLength(col 0:string) -> 9:int) -> 12:boolean, VectorUDFAdaptor(CAST( name AS BINARY)) -> 13:binary) -> 14:binary, IfExprCondExprNull(col 9:boolean, col 15:int, null)(children: LongColGreaterLongScalar(col 1:int, val 40) -> 9:boolean, StringLength(col 0:string) -> 15:int) -> 16:int, IfExprCondE
xprNull(col 18:boolean, col 19:double, null)(children: LongColGreaterLongScalar(col 17:int, val 10)(children: StringLength(col 0:string) -> 17:int) -> 18:boolean, DoubleScalarMultiplyDoubleColumn(val 2.0, col 2:double) -> 19:double) -> 20:double
+ Statistics: Num rows: 12 Data size: 2352 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
- Statistics: Num rows: 2 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12 Data size: 2352 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.insert_10_1
+ name: default.insert_a_better
Execution mode: vectorized, llap
- LLAP IO: no inputs
+ LLAP IO: all inputs
Map Vectorization:
enabled: true
- enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled]
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: []
featureSupportInUse: []
- inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
allNative: false
usesVectorUDFAdaptor: true
vectorized: true
@@ -116,7 +501,7 @@ STAGE PLANS:
includeColumns: [0, 1, 2]
dataColumns: name:string, age:int, gpa:double
partitionColumnCount: 0
- scratchColumnTypeNames: [bigint, timestamp, bigint, string, string]
+ scratchColumnTypeNames: [bigint, bigint, bigint, timestamp, timestamp, bigint, bigint, string, bigint, string, string, bigint, bigint, bigint, bigint, double, double]
Stage: Stage-2
Dependency Collection
@@ -129,29 +514,578 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.insert_10_1
+ name: default.insert_a_better
Stage: Stage-3
Stats Work
Basic Stats Work:
-PREHOOK: query: insert overwrite table insert_10_1
- select cast(gpa as float),
- age,
- IF(age>40,cast('2011-01-01 01:01:01' as timestamp),NULL),
- IF(LENGTH(name)>10,cast(name as binary),NULL) from student_2_lines
-PREHOOK: type: QUERY
-PREHOOK: Input: default@student_2_lines
-PREHOOK: Output: default@insert_10_1
-POSTHOOK: query: insert overwrite table insert_10_1
- select cast(gpa as float),
- age,
- IF(age>40,cast('2011-01-01 01:01:01' as timestamp),NULL),
- IF(LENGTH(name)>10,cast(name as binary),NULL) from student_2_lines
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@student_2_lines
-POSTHOOK: Output: default@insert_10_1
-POSTHOOK: Lineage: insert_10_1.a EXPRESSION [(student_2_lines)student_2_lines.FieldSchema(name:gpa, type:double, comment:null), ]
-POSTHOOK: Lineage: insert_10_1.b SIMPLE [(student_2_lines)student_2_lines.FieldSchema(name:age, type:int, comment:null), ]
-POSTHOOK: Lineage: insert_10_1.c EXPRESSION [(student_2_lines)student_2_lines.FieldSchema(name:age, type:int, comment:null), ]
-POSTHOOK: Lineage: insert_10_1.d EXPRESSION [(student_2_lines)student_2_lines.FieldSchema(name:name, type:string, comment:null), ]
+PREHOOK: query: insert overwrite table insert_a_better
+ select
+ name,
+ age,
+ gpa,
+ IF(age<40, age, NULL),
+ IF(age>40, cast('2011-01-01 01:01:01' as timestamp), NULL),
+ IF(LENGTH(name)>8, name, NULL),
+ IF(LENGTH(name)<8, cast(name as binary), NULL),
+ IF(age>40, LENGTH(name), NULL),
+ IF(LENGTH(name)> 10, 2 * gpa, NULL)
+ from student_10_lines
+PREHOOK: type: QUERY
+PREHOOK: Input: default@student_10_lines
+PREHOOK: Output: default@insert_a_better
+POSTHOOK: query: insert overwrite table insert_a_better
+ select
+ name,
+ age,
+ gpa,
+ IF(age<40, age, NULL),
+ IF(age>40, cast('2011-01-01 01:01:01' as timestamp), NULL),
+ IF(LENGTH(name)>8, name, NULL),
+ IF(LENGTH(name)<8, cast(name as binary), NULL),
+ IF(age>40, LENGTH(name), NULL),
+ IF(LENGTH(name)> 10, 2 * gpa, NULL)
+ from student_10_lines
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@student_10_lines
+POSTHOOK: Output: default@insert_a_better
+POSTHOOK: Lineage: insert_a_better.a EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), ]
+POSTHOOK: Lineage: insert_a_better.age SIMPLE [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), ]
+POSTHOOK: Lineage: insert_a_better.b EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), ]
+POSTHOOK: Lineage: insert_a_better.c EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ]
+POSTHOOK: Lineage: insert_a_better.d EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ]
+POSTHOOK: Lineage: insert_a_better.e EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), (student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ]
+POSTHOOK: Lineage: insert_a_better.f EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), (student_10_lines)student_10_lines.FieldSchema(name:gpa, type:double, comment:null), ]
+POSTHOOK: Lineage: insert_a_better.gpa SIMPLE [(student_10_lines)student_10_lines.FieldSchema(name:gpa, type:double, comment:null), ]
+POSTHOOK: Lineage: insert_a_better.name SIMPLE [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ]
+name age gpa _c3 _c4 _c5 _c6 _c7 _c8
+PREHOOK: query: select * from insert_a_better
+PREHOOK: type: QUERY
+PREHOOK: Input: default@insert_a_better
+#### A masked pattern was here ####
+POSTHOOK: query: select * from insert_a_better
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@insert_a_better
+#### A masked pattern was here ####
+insert_a_better.name insert_a_better.age insert_a_better.gpa insert_a_better.a insert_a_better.b insert_a_better.c insert_a_better.d insert_a_better.e insert_a_better.f
+George 22 3.8 22 NULL NULL George NULL NULL
+NULL NULL NULL NULL NULL NULL NULL NULL NULL
+calvin brown 28 2.7 28 NULL calvin brown NULL NULL 5.4
+luke brown 60 1.14 NULL 2011-01-01 01:01:01 luke brown NULL 10 NULL
+luke king 28 0.47 28 NULL luke king NULL NULL NULL
+nick johnson 34 NULL 34 NULL nick johnson NULL NULL NULL
+oscar thompson 35 2.98 35 NULL oscar thompson NULL NULL 5.96
+priscilla falkner 55 1.16 NULL 2011-01-01 01:01:01 priscilla falkner NULL 17 2.32
+quinn ovid 19 NULL 19 NULL quinn ovid NULL NULL NULL
+tom thompson 42 0.53 NULL 2011-01-01 01:01:01 tom thompson NULL 12 1.06
+ulysses garcia 35 2.74 35 NULL ulysses garcia NULL NULL 5.48
+xavier garcia 33 1.06 33 NULL xavier garcia NULL NULL 2.12
+PREHOOK: query: create table insert_b_adaptor (name string, age int, gpa double, a int, b timestamp, c string, d binary, e int, f double)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@insert_b_adaptor
+POSTHOOK: query: create table insert_b_adaptor (name string, age int, gpa double, a int, b timestamp, c string, d binary, e int, f double)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@insert_b_adaptor
+PREHOOK: query: explain vectorization detail
+insert overwrite table insert_b_adaptor
+ select
+ name,
+ age,
+ gpa,
+ IF(age<40, NULL, age),
+ IF(age>40, NULL, cast('2011-01-01 01:01:01' as timestamp)),
+ IF(LENGTH(name)>8, NULL, name),
+ IF(LENGTH(name)<8, NULL, cast(name as binary)),
+ IF(age>40, NULL, LENGTH(name)),
+ IF(LENGTH(name)> 10, NULL, 2 * gpa)
+ from student_10_lines
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+insert overwrite table insert_b_adaptor
+ select
+ name,
+ age,
+ gpa,
+ IF(age<40, NULL, age),
+ IF(age>40, NULL, cast('2011-01-01 01:01:01' as timestamp)),
+ IF(LENGTH(name)>8, NULL, name),
+ IF(LENGTH(name)<8, NULL, cast(name as binary)),
+ IF(age>40, NULL, LENGTH(name)),
+ IF(LENGTH(name)> 10, NULL, 2 * gpa)
+ from student_10_lines
+POSTHOOK: type: QUERY
+Explain
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+ Stage-3 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: student_10_lines
+ Statistics: Num rows: 12 Data size: 2352 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: name (type: string), age (type: int), gpa (type: double), if((age < 40), null, age) (type: int), if((age > 40), null, TIMESTAMP'2011-01-01 01:01:01.0') (type: timestamp), if((length(name) > 8), null, name) (type: string), if((length(name) < 8), null, CAST( name AS BINARY)) (type: binary), if((age > 40), null, length(name)) (type: int), if((length(name) > 10), null, (2.0D * gpa)) (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 12 Data size: 2352 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 12 Data size: 2352 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.insert_b_adaptor
+ Execution mode: llap
+ LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ notVectorizedReason: SELECT operator: Unexpected primitive type category VOID
+ vectorized: false
+
+ Stage: Stage-2
+ Dependency Collection
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.insert_b_adaptor
+
+ Stage: Stage-3
+ Stats Work
+ Basic Stats Work:
+
+PREHOOK: query: insert overwrite table insert_b_adaptor
+ select
+ name,
+ age,
+ gpa,
+ IF(age<40, NULL, age),
+ IF(age>40, NULL, cast('2011-01-01 01:01:01' as timestamp)),
+ IF(LENGTH(name)>8, NULL, name),
+ IF(LENGTH(name)<8, NULL, cast(name as binary)),
+ IF(age>40, NULL, LENGTH(name)),
+ IF(LENGTH(name)> 10, NULL, 2 * gpa)
+ from student_10_lines
+PREHOOK: type: QUERY
+PREHOOK: Input: default@student_10_lines
+PREHOOK: Output: default@insert_b_adaptor
+POSTHOOK: query: insert overwrite table insert_b_adaptor
+ select
+ name,
+ age,
+ gpa,
+ IF(age<40, NULL, age),
+ IF(age>40, NULL, cast('2011-01-01 01:01:01' as timestamp)),
+ IF(LENGTH(name)>8, NULL, name),
+ IF(LENGTH(name)<8, NULL, cast(name as binary)),
+ IF(age>40, NULL, LENGTH(name)),
+ IF(LENGTH(name)> 10, NULL, 2 * gpa)
+ from student_10_lines
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@student_10_lines
+POSTHOOK: Output: default@insert_b_adaptor
+POSTHOOK: Lineage: insert_b_adaptor.a EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), ]
+POSTHOOK: Lineage: insert_b_adaptor.age SIMPLE [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), ]
+POSTHOOK: Lineage: insert_b_adaptor.b EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), ]
+POSTHOOK: Lineage: insert_b_adaptor.c EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ]
+POSTHOOK: Lineage: insert_b_adaptor.d EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ]
+POSTHOOK: Lineage: insert_b_adaptor.e EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), (student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ]
+POSTHOOK: Lineage: insert_b_adaptor.f EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), (student_10_lines)student_10_lines.FieldSchema(name:gpa, type:double, comment:null), ]
+POSTHOOK: Lineage: insert_b_adaptor.gpa SIMPLE [(student_10_lines)student_10_lines.FieldSchema(name:gpa, type:double, comment:null), ]
+POSTHOOK: Lineage: insert_b_adaptor.name SIMPLE [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ]
+name age gpa _c3 _c4 _c5 _c6 _c7 _c8
+PREHOOK: query: select * from insert_b_adaptor
+PREHOOK: type: QUERY
+PREHOOK: Input: default@insert_b_adaptor
+#### A masked pattern was here ####
+POSTHOOK: query: select * from insert_b_adaptor
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@insert_b_adaptor
+#### A masked pattern was here ####
+insert_b_adaptor.name insert_b_adaptor.age insert_b_adaptor.gpa insert_b_adaptor.a insert_b_adaptor.b insert_b_adaptor.c insert_b_adaptor.d insert_b_adaptor.e insert_b_adaptor.f
+George 22 3.8 NULL 2011-01-01 01:01:01 George NULL 6 7.6
+NULL NULL NULL NULL 2011-01-01 01:01:01 NULL NULL NULL NULL
+calvin brown 28 2.7 NULL 2011-01-01 01:01:01 NULL calvin brown 12 NULL
+luke brown 60 1.14 60 NULL NULL luke brown NULL 2.28
+luke king 28 0.47 NULL 2011-01-01 01:01:01 NULL luke king 9 0.94
+nick johnson 34 NULL NULL 2011-01-01 01:01:01 NULL nick johnson 12 NULL
+oscar thompson 35 2.98 NULL 2011-01-01 01:01:01 NULL oscar thompson 14 NULL
+priscilla falkner 55 1.16 55 NULL NULL priscilla falkner NULL NULL
+quinn ovid 19 NULL NULL 2011-01-01 01:01:01 NULL quinn ovid 10 NULL
+tom thompson 42 0.53 42 NULL NULL tom thompson NULL NULL
+ulysses garcia 35 2.74 NULL 2011-01-01 01:01:01 NULL ulysses garcia 14 NULL
+xavier garcia 33 1.06 NULL 2011-01-01 01:01:01 NULL xavier garcia 13 NULL
+PREHOOK: query: create table insert_b_good (name string, age int, gpa double, a int, b timestamp, c string, d binary, e int, f double)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@insert_b_good
+POSTHOOK: query: create table insert_b_good (name string, age int, gpa double, a int, b timestamp, c string, d binary, e int, f double)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@insert_b_good
+PREHOOK: query: explain vectorization detail
+insert overwrite table insert_b_good
+ select
+ name,
+ age,
+ gpa,
+ IF(age<40, NULL, age),
+ IF(age>40, NULL, cast('2011-01-01 01:01:01' as timestamp)),
+ IF(LENGTH(name)>8, NULL, name),
+ IF(LENGTH(name)<8, NULL, cast(name as binary)),
+ IF(age>40, NULL, LENGTH(name)),
+ IF(LENGTH(name)> 10, NULL, 2 * gpa)
+ from student_10_lines
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+insert overwrite table insert_b_good
+ select
+ name,
+ age,
+ gpa,
+ IF(age<40, NULL, age),
+ IF(age>40, NULL, cast('2011-01-01 01:01:01' as timestamp)),
+ IF(LENGTH(name)>8, NULL, name),
+ IF(LENGTH(name)<8, NULL, cast(name as binary)),
+ IF(age>40, NULL, LENGTH(name)),
+ IF(LENGTH(name)> 10, NULL, 2 * gpa)
+ from student_10_lines
+POSTHOOK: type: QUERY
+Explain
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+ Stage-3 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: student_10_lines
+ Statistics: Num rows: 12 Data size: 2352 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:name:string, 1:age:int, 2:gpa:double, 3:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>]
+ Select Operator
+ expressions: name (type: string), age (type: int), gpa (type: double), if((age < 40), null, age) (type: int), if((age > 40), null, TIMESTAMP'2011-01-01 01:01:01.0') (type: timestamp), if((length(name) > 8), null, name) (type: string), if((length(name) < 8), null, CAST( name AS BINARY)) (type: binary), if((age > 40), null, length(name)) (type: int), if((length(name) > 10), null, (2.0D * gpa)) (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 1, 2, 5, 8, 11, 14, 16, 20]
+ selectExpressions: IfExprNullColumn(col 4:boolean, null, col 1)(children: LongColLessLongScalar(col 1:int, val 40) -> 4:boolean, col 1:int) -> 5:int, IfExprNullColumn(col 6:boolean, null, col 7)(children: LongColGreaterLongScalar(col 1:int, val 40) -> 6:boolean, ConstantVectorExpression(val 2011-01-01 01:01:01.0) -> 7:timestamp) -> 8:timestamp, IfExprNullColumn(col 10:boolean, null, col 0)(children: LongColGreaterLongScalar(col 9:int, val 8)(children: StringLength(col 0:string) -> 9:int) -> 10:boolean, col 0:string) -> 11:string, IfExprNullColumn(col 12:boolean, null, col 13)(children: LongColLessLongScalar(col 9:int, val 8)(children: StringLength(col 0:string) -> 9:int) -> 12:boolean, VectorUDFAdaptor(CAST( name AS BINARY)) -> 13:binary) -> 14:binary, IfExprNullColumn(col 9:boolean, null, col 15)(children: LongColGreaterLongScalar(col 1:int, val 40) -> 9:boolean, StringLength(col 0:string) -> 15:int) -> 16:int, IfExprNullColumn(col 18:boolean, null, col 19)(
children: LongColGreaterLongScalar(col 17:int, val 10)(children: StringLength(col 0:string) -> 17:int) -> 18:boolean, DoubleScalarMultiplyDoubleColumn(val 2.0, col 2:double) -> 19:double) -> 20:double
+ Statistics: Num rows: 12 Data size: 2352 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 12 Data size: 2352 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.insert_b_good
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: []
+ featureSupportInUse: []
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: true
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 3
+ includeColumns: [0, 1, 2]
+ dataColumns: name:string, age:int, gpa:double
+ partitionColumnCount: 0
+ scratchColumnTypeNames: [bigint, bigint, bigint, timestamp, timestamp, bigint, bigint, string, bigint, string, string, bigint, bigint, bigint, bigint, double, double]
+
+ Stage: Stage-2
+ Dependency Collection
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.insert_b_good
+
+ Stage: Stage-3
+ Stats Work
+ Basic Stats Work:
+
+PREHOOK: query: insert overwrite table insert_b_good
+ select
+ name,
+ age,
+ gpa,
+ IF(age<40, NULL, age),
+ IF(age>40, NULL, cast('2011-01-01 01:01:01' as timestamp)),
+ IF(LENGTH(name)>8, NULL, name),
+ IF(LENGTH(name)<8, NULL, cast(name as binary)),
+ IF(age>40, NULL, LENGTH(name)),
+ IF(LENGTH(name)> 10, NULL, 2 * gpa)
+ from student_10_lines
+PREHOOK: type: QUERY
+PREHOOK: Input: default@student_10_lines
+PREHOOK: Output: default@insert_b_good
+POSTHOOK: query: insert overwrite table insert_b_good
+ select
+ name,
+ age,
+ gpa,
+ IF(age<40, NULL, age),
+ IF(age>40, NULL, cast('2011-01-01 01:01:01' as timestamp)),
+ IF(LENGTH(name)>8, NULL, name),
+ IF(LENGTH(name)<8, NULL, cast(name as binary)),
+ IF(age>40, NULL, LENGTH(name)),
+ IF(LENGTH(name)> 10, NULL, 2 * gpa)
+ from student_10_lines
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@student_10_lines
+POSTHOOK: Output: default@insert_b_good
+POSTHOOK: Lineage: insert_b_good.a EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), ]
+POSTHOOK: Lineage: insert_b_good.age SIMPLE [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), ]
+POSTHOOK: Lineage: insert_b_good.b EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), ]
+POSTHOOK: Lineage: insert_b_good.c EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ]
+POSTHOOK: Lineage: insert_b_good.d EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ]
+POSTHOOK: Lineage: insert_b_good.e EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), (student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ]
+POSTHOOK: Lineage: insert_b_good.f EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), (student_10_lines)student_10_lines.FieldSchema(name:gpa, type:double, comment:null), ]
+POSTHOOK: Lineage: insert_b_good.gpa SIMPLE [(student_10_lines)student_10_lines.FieldSchema(name:gpa, type:double, comment:null), ]
+POSTHOOK: Lineage: insert_b_good.name SIMPLE [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ]
+name age gpa _c3 _c4 _c5 _c6 _c7 _c8
+PREHOOK: query: select * from insert_b_good
+PREHOOK: type: QUERY
+PREHOOK: Input: default@insert_b_good
+#### A masked pattern was here ####
+POSTHOOK: query: select * from insert_b_good
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@insert_b_good
+#### A masked pattern was here ####
+insert_b_good.name insert_b_good.age insert_b_good.gpa insert_b_good.a insert_b_good.b insert_b_good.c insert_b_good.d insert_b_good.e insert_b_good.f
+George 22 3.8 NULL 2011-01-01 01:01:01 George NULL 6 7.6
+NULL NULL NULL NULL 2011-01-01 01:01:01 NULL NULL NULL NULL
+calvin brown 28 2.7 NULL 2011-01-01 01:01:01 NULL calvin brown 12 NULL
+luke brown 60 1.14 60 NULL NULL luke brown NULL 2.28
+luke king 28 0.47 NULL 2011-01-01 01:01:01 NULL luke king 9 0.94
+nick johnson 34 NULL NULL 2011-01-01 01:01:01 NULL nick johnson 12 NULL
+oscar thompson 35 2.98 NULL 2011-01-01 01:01:01 NULL oscar thompson 14 NULL
+priscilla falkner 55 1.16 55 NULL NULL priscilla falkner NULL NULL
+quinn ovid 19 NULL NULL 2011-01-01 01:01:01 NULL quinn ovid 10 NULL
+tom thompson 42 0.53 42 NULL NULL tom thompson NULL NULL
+ulysses garcia 35 2.74 NULL 2011-01-01 01:01:01 NULL ulysses garcia 14 NULL
+xavier garcia 33 1.06 NULL 2011-01-01 01:01:01 NULL xavier garcia 13 NULL
+PREHOOK: query: create table insert_b_better (name string, age int, gpa double, a int, b timestamp, c string, d binary, e int, f double)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@insert_b_better
+POSTHOOK: query: create table insert_b_better (name string, age int, gpa double, a int, b timestamp, c string, d binary, e int, f double)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@insert_b_better
+PREHOOK: query: explain vectorization detail
+insert overwrite table insert_b_better
+ select
+ name,
+ age,
+ gpa,
+ IF(age<40, NULL, age),
+ IF(age>40, NULL, cast('2011-01-01 01:01:01' as timestamp)),
+ IF(LENGTH(name)>8, NULL, name),
+ IF(LENGTH(name)<8, NULL, cast(name as binary)),
+ IF(age>40, NULL, LENGTH(name)),
+ IF(LENGTH(name)> 10, NULL, 2 * gpa)
+ from student_10_lines
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+insert overwrite table insert_b_better
+ select
+ name,
+ age,
+ gpa,
+ IF(age<40, NULL, age),
+ IF(age>40, NULL, cast('2011-01-01 01:01:01' as timestamp)),
+ IF(LENGTH(name)>8, NULL, name),
+ IF(LENGTH(name)<8, NULL, cast(name as binary)),
+ IF(age>40, NULL, LENGTH(name)),
+ IF(LENGTH(name)> 10, NULL, 2 * gpa)
+ from student_10_lines
+POSTHOOK: type: QUERY
+Explain
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+ Stage-3 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: student_10_lines
+ Statistics: Num rows: 12 Data size: 2352 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:name:string, 1:age:int, 2:gpa:double, 3:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>]
+ Select Operator
+ expressions: name (type: string), age (type: int), gpa (type: double), if((age < 40), null, age) (type: int), if((age > 40), null, TIMESTAMP'2011-01-01 01:01:01.0') (type: timestamp), if((length(name) > 8), null, name) (type: string), if((length(name) < 8), null, CAST( name AS BINARY)) (type: binary), if((age > 40), null, length(name)) (type: int), if((length(name) > 10), null, (2.0D * gpa)) (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 1, 2, 5, 8, 11, 14, 16, 20]
+ selectExpressions: IfExprNullColumn(col 4:boolean, null, col 1)(children: LongColLessLongScalar(col 1:int, val 40) -> 4:boolean, col 1:int) -> 5:int, IfExprNullColumn(col 6:boolean, null, col 7)(children: LongColGreaterLongScalar(col 1:int, val 40) -> 6:boolean, ConstantVectorExpression(val 2011-01-01 01:01:01.0) -> 7:timestamp) -> 8:timestamp, IfExprNullColumn(col 10:boolean, null, col 0)(children: LongColGreaterLongScalar(col 9:int, val 8)(children: StringLength(col 0:string) -> 9:int) -> 10:boolean, col 0:string) -> 11:string, IfExprNullCondExpr(col 12:boolean, null, col 13:binary)(children: LongColLessLongScalar(col 9:int, val 8)(children: StringLength(col 0:string) -> 9:int) -> 12:boolean, VectorUDFAdaptor(CAST( name AS BINARY)) -> 13:binary) -> 14:binary, IfExprNullCondExpr(col 9:boolean, null, col 15:int)(children: LongColGreaterLongScalar(col 1:int, val 40) -> 9:boolean, StringLength(col 0:string) -> 15:int) -> 16:int, IfExprNullCondExpr(col 18:boolea
n, null, col 19:double)(children: LongColGreaterLongScalar(col 17:int, val 10)(children: StringLength(col 0:string) -> 17:int) -> 18:boolean, DoubleScalarMultiplyDoubleColumn(val 2.0, col 2:double) -> 19:double) -> 20:double
+ Statistics: Num rows: 12 Data size: 2352 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 12 Data size: 2352 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.insert_b_better
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: []
+ featureSupportInUse: []
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: true
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 3
+ includeColumns: [0, 1, 2]
+ dataColumns: name:string, age:int, gpa:double
+ partitionColumnCount: 0
+ scratchColumnTypeNames: [bigint, bigint, bigint, timestamp, timestamp, bigint, bigint, string, bigint, string, string, bigint, bigint, bigint, bigint, double, double]
+
+ Stage: Stage-2
+ Dependency Collection
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.insert_b_better
+
+ Stage: Stage-3
+ Stats Work
+ Basic Stats Work:
+
+PREHOOK: query: insert overwrite table insert_b_better
+ select
+ name,
+ age,
+ gpa,
+ IF(age<40, NULL, age),
+ IF(age>40, NULL, cast('2011-01-01 01:01:01' as timestamp)),
+ IF(LENGTH(name)>8, NULL, name),
+ IF(LENGTH(name)<8, NULL, cast(name as binary)),
+ IF(age>40, NULL, LENGTH(name)),
+ IF(LENGTH(name)> 10, NULL, 2 * gpa)
+ from student_10_lines
+PREHOOK: type: QUERY
+PREHOOK: Input: default@student_10_lines
+PREHOOK: Output: default@insert_b_better
+POSTHOOK: query: insert overwrite table insert_b_better
+ select
+ name,
+ age,
+ gpa,
+ IF(age<40, NULL, age),
+ IF(age>40, NULL, cast('2011-01-01 01:01:01' as timestamp)),
+ IF(LENGTH(name)>8, NULL, name),
+ IF(LENGTH(name)<8, NULL, cast(name as binary)),
+ IF(age>40, NULL, LENGTH(name)),
+ IF(LENGTH(name)> 10, NULL, 2 * gpa)
+ from student_10_lines
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@student_10_lines
+POSTHOOK: Output: default@insert_b_better
+POSTHOOK: Lineage: insert_b_better.a EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), ]
+POSTHOOK: Lineage: insert_b_better.age SIMPLE [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), ]
+POSTHOOK: Lineage: insert_b_better.b EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), ]
+POSTHOOK: Lineage: insert_b_better.c EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ]
+POSTHOOK: Lineage: insert_b_better.d EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ]
+POSTHOOK: Lineage: insert_b_better.e EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), (student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ]
+POSTHOOK: Lineage: insert_b_better.f EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), (student_10_lines)student_10_lines.FieldSchema(name:gpa, type:double, comment:null), ]
+POSTHOOK: Lineage: insert_b_better.gpa SIMPLE [(student_10_lines)student_10_lines.FieldSchema(name:gpa, type:double, comment:null), ]
+POSTHOOK: Lineage: insert_b_better.name SIMPLE [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ]
+name age gpa _c3 _c4 _c5 _c6 _c7 _c8
+PREHOOK: query: select * from insert_b_better
+PREHOOK: type: QUERY
+PREHOOK: Input: default@insert_b_better
+#### A masked pattern was here ####
+POSTHOOK: query: select * from insert_b_better
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@insert_b_better
+#### A masked pattern was here ####
+insert_b_better.name insert_b_better.age insert_b_better.gpa insert_b_better.a insert_b_better.b insert_b_better.c insert_b_better.d insert_b_better.e insert_b_better.f
+George 22 3.8 NULL 2011-01-01 01:01:01 George NULL 6 7.6
+NULL NULL NULL NULL 2011-01-01 01:01:01 NULL NULL NULL NULL
+calvin brown 28 2.7 NULL 2011-01-01 01:01:01 NULL calvin brown 12 NULL
+luke brown 60 1.14 60 NULL NULL luke brown NULL 2.28
+luke king 28 0.47 NULL 2011-01-01 01:01:01 NULL luke king 9 0.94
+nick johnson 34 NULL NULL 2011-01-01 01:01:01 NULL nick johnson 12 NULL
+oscar thompson 35 2.98 NULL 2011-01-01 01:01:01 NULL oscar thompson 14 NULL
+priscilla falkner 55 1.16 55 NULL NULL priscilla falkner NULL NULL
+quinn ovid 19 NULL NULL 2011-01-01 01:01:01 NULL quinn ovid 10 NULL
+tom thompson 42 0.53 42 NULL NULL tom thompson NULL NULL
+ulysses garcia 35 2.74 NULL 2011-01-01 01:01:01 NULL ulysses garcia 14 NULL
+xavier garcia 33 1.06 NULL 2011-01-01 01:01:01 NULL xavier garcia 13 NULL
http://git-wip-us.apache.org/repos/asf/hive/blob/53980ba6/ql/src/test/results/clientpositive/llap/vector_when_case_null.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_when_case_null.q.out b/ql/src/test/results/clientpositive/llap/vector_when_case_null.q.out
index 8d3f163..de30ca7 100644
--- a/ql/src/test/results/clientpositive/llap/vector_when_case_null.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_when_case_null.q.out
@@ -51,13 +51,13 @@ STAGE PLANS:
Select Vectorization:
className: VectorSelectOperator
native: true
- projectedOutputColumnNums: [0, 6]
- selectExpressions: IfExprLongScalarLongColumn(col 1:boolean, val 1, col 5:int)(children: IfExprColumnNull(col 3:boolean, col 4:int, null)(children: NotCol(col 1:boolean) -> 3:boolean, ConstantVectorExpression(val 0) -> 4:int) -> 5:int) -> 6:int
+ projectedOutputColumnNums: [0, 7]
+ selectExpressions: IfExprColumnCondExpr(col 1:boolean, col 3:intcol 6:int)(children: col 1:boolean, ConstantVectorExpression(val 1) -> 3:int, IfExprColumnNull(col 4:boolean, col 5:int, null)(children: NotCol(col 1:boolean) -> 4:boolean, ConstantVectorExpression(val 0) -> 5:int) -> 6:int) -> 7:int
Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count(_col1)
Group By Vectorization:
- aggregators: VectorUDAFCount(col 6:int) -> bigint
+ aggregators: VectorUDAFCount(col 7:int) -> bigint
className: VectorGroupByOperator
groupByMode: HASH
keyExpressions: col 0:string
http://git-wip-us.apache.org/repos/asf/hive/blob/53980ba6/ql/src/test/results/clientpositive/llap/vectorized_case.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorized_case.q.out b/ql/src/test/results/clientpositive/llap/vectorized_case.q.out
index 9143816..19d9e39 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_case.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_case.q.out
@@ -68,8 +68,8 @@ STAGE PLANS:
Select Vectorization:
className: VectorSelectOperator
native: true
- projectedOutputColumnNums: [1, 16, 17]
- selectExpressions: IfExprStringScalarStringGroupColumn(col 13:boolean, val acol 15:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 13:boolean, IfExprStringScalarStringScalar(col 14:boolean, val b, val c)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 14:boolean) -> 15:string) -> 16:string, IfExprStringScalarStringGroupColumn(col 13:boolean, val acol 15:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 13:boolean, IfExprStringScalarStringScalar(col 14:boolean, val b, val c)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 14:boolean) -> 15:string) -> 17:string
+ projectedOutputColumnNums: [1, 17, 21]
+ selectExpressions: IfExprColumnCondExpr(col 13:boolean, col 14:stringcol 16:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 13:boolean, ConstantVectorExpression(val a) -> 14:string, IfExprStringScalarStringScalar(col 15:boolean, val b, val c)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 15:boolean) -> 16:string) -> 17:string, IfExprColumnCondExpr(col 15:boolean, col 18:stringcol 20:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 15:boolean, ConstantVectorExpression(val a) -> 18:string, IfExprStringScalarStringScalar(col 19:boolean, val b, val c)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 19:boolean) -> 20:string) -> 21:string
Statistics: Num rows: 6 Data size: 2228 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
@@ -97,7 +97,7 @@ STAGE PLANS:
includeColumns: [1]
dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
partitionColumnCount: 0
- scratchColumnTypeNames: [bigint, bigint, string, string, string]
+ scratchColumnTypeNames: [bigint, string, bigint, string, string, string, bigint, string, string]
Stage: Stage-0
Fetch Operator
@@ -217,8 +217,8 @@ STAGE PLANS:
Select Vectorization:
className: VectorSelectOperator
native: true
- projectedOutputColumnNums: [1, 17, 20]
- selectExpressions: IfExprStringScalarStringGroupColumn(col 13:boolean, val acol 16:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 13:boolean, IfExprColumnNull(col 14:boolean, col 15:string, null)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 14:boolean, ConstantVectorExpression(val b) -> 15:string) -> 16:string) -> 17:string, IfExprStringScalarStringGroupColumn(col 13:boolean, val acol 19:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 13:boolean, IfExprNullColumn(col 18:boolean, null, col 16)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 18:boolean, ConstantVectorExpression(val c) -> 16:string) -> 19:string) -> 20:string
+ projectedOutputColumnNums: [1, 18, 24]
+ selectExpressions: IfExprColumnCondExpr(col 13:boolean, col 14:stringcol 17:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 13:boolean, ConstantVectorExpression(val a) -> 14:string, IfExprColumnNull(col 15:boolean, col 16:string, null)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 15:boolean, ConstantVectorExpression(val b) -> 16:string) -> 17:string) -> 18:string, IfExprColumnCondExpr(col 19:boolean, col 20:stringcol 23:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 19:boolean, ConstantVectorExpression(val a) -> 20:string, IfExprNullColumn(col 21:boolean, null, col 22)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 21:boolean, ConstantVectorExpression(val c) -> 22:string) -> 23:string) -> 24:string
Statistics: Num rows: 6 Data size: 2228 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
@@ -246,7 +246,7 @@ STAGE PLANS:
includeColumns: [1]
dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
partitionColumnCount: 0
- scratchColumnTypeNames: [bigint, bigint, string, string, string, bigint, string, string]
+ scratchColumnTypeNames: [bigint, string, bigint, string, string, string, bigint, string, bigint, string, string, string]
Stage: Stage-0
Fetch Operator
@@ -594,7 +594,7 @@ STAGE PLANS:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [6]
- selectExpressions: VectorUDFAdaptor(if((member = 1), (attr + 1), (attr + 2)))(children: VectorUDFAdaptor((member = 1)) -> 3:boolean, DecimalColAddDecimalScalar(col 1:decimal(10,0), val 1) -> 4:decimal(11,0), DecimalColAddDecimalScalar(col 1:decimal(10,0), val 2) -> 5:decimal(11,0)) -> 6:decimal(11,0)
+ selectExpressions: IfExprCondExprCondExpr(col 3:boolean, col 4:decimal(11,0)col 5:decimal(11,0))(children: VectorUDFAdaptor((member = 1)) -> 3:boolean, DecimalColAddDecimalScalar(col 1:decimal(10,0), val 1) -> 4:decimal(11,0), DecimalColAddDecimalScalar(col 1:decimal(10,0), val 2) -> 5:decimal(11,0)) -> 6:decimal(11,0)
Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
@@ -674,8 +674,8 @@ STAGE PLANS:
Select Vectorization:
className: VectorSelectOperator
native: true
- projectedOutputColumnNums: [5]
- selectExpressions: VectorUDFAdaptor(if((member = 1), 1, (attr + 2)))(children: VectorUDFAdaptor((member = 1)) -> 3:boolean, DecimalColAddDecimalScalar(col 1:decimal(10,0), val 2) -> 4:decimal(11,0)) -> 5:decimal(11,0)
+ projectedOutputColumnNums: [6]
+ selectExpressions: IfExprColumnCondExpr(col 3:boolean, col 4:decimal(1,0)col 5:decimal(11,0))(children: VectorUDFAdaptor((member = 1)) -> 3:boolean, ConstantVectorExpression(val 1) -> 4:decimal(1,0), DecimalColAddDecimalScalar(col 1:decimal(10,0), val 2) -> 5:decimal(11,0)) -> 6:decimal(11,0)
Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
@@ -703,7 +703,7 @@ STAGE PLANS:
includeColumns: [0, 1]
dataColumns: member:decimal(10,0), attr:decimal(10,0)
partitionColumnCount: 0
- scratchColumnTypeNames: [bigint, decimal(11,0), decimal(11,0)]
+ scratchColumnTypeNames: [bigint, decimal(1,0), decimal(11,0), decimal(11,0)]
Stage: Stage-0
Fetch Operator
@@ -755,8 +755,8 @@ STAGE PLANS:
Select Vectorization:
className: VectorSelectOperator
native: true
- projectedOutputColumnNums: [5]
- selectExpressions: VectorUDFAdaptor(if((member = 1), (attr + 1), 2))(children: VectorUDFAdaptor((member = 1)) -> 3:boolean, DecimalColAddDecimalScalar(col 1:decimal(10,0), val 1) -> 4:decimal(11,0)) -> 5:decimal(11,0)
+ projectedOutputColumnNums: [6]
+ selectExpressions: IfExprCondExprColumn(col 3:boolean, col 4:decimal(11,0), col 5:decimal(1,0))(children: VectorUDFAdaptor((member = 1)) -> 3:boolean, DecimalColAddDecimalScalar(col 1:decimal(10,0), val 1) -> 4:decimal(11,0), ConstantVectorExpression(val 2) -> 5:decimal(1,0)) -> 6:decimal(11,0)
Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
@@ -784,7 +784,7 @@ STAGE PLANS:
includeColumns: [0, 1]
dataColumns: member:decimal(10,0), attr:decimal(10,0)
partitionColumnCount: 0
- scratchColumnTypeNames: [bigint, decimal(11,0), decimal(11,0)]
+ scratchColumnTypeNames: [bigint, decimal(11,0), decimal(1,0), decimal(11,0)]
Stage: Stage-0
Fetch Operator
@@ -855,7 +855,7 @@ STAGE PLANS:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [6]
- selectExpressions: IfExprLongColumnLongColumn(col 3:boolean, col 4:bigint, col 5:bigint)(children: LongColEqualLongScalar(col 0:bigint, val 1) -> 3:boolean, LongColAddLongScalar(col 1:bigint, val 1) -> 4:bigint, LongColAddLongScalar(col 1:bigint, val 2) -> 5:bigint) -> 6:bigint
+ selectExpressions: IfExprCondExprCondExpr(col 3:boolean, col 4:bigintcol 5:bigint)(children: LongColEqualLongScalar(col 0:bigint, val 1) -> 3:boolean, LongColAddLongScalar(col 1:bigint, val 1) -> 4:bigint, LongColAddLongScalar(col 1:bigint, val 2) -> 5:bigint) -> 6:bigint
Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
@@ -936,7 +936,7 @@ STAGE PLANS:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [5]
- selectExpressions: IfExprNullColumn(col 3:boolean, null, col 4)(children: LongColEqualLongScalar(col 0:bigint, val 1) -> 3:boolean, LongColAddLongScalar(col 1:bigint, val 2) -> 4:bigint) -> 5:bigint
+ selectExpressions: IfExprNullCondExpr(col 3:boolean, null, col 4:bigint)(children: LongColEqualLongScalar(col 0:bigint, val 1) -> 3:boolean, LongColAddLongScalar(col 1:bigint, val 2) -> 4:bigint) -> 5:bigint
Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
@@ -1017,7 +1017,7 @@ STAGE PLANS:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [5]
- selectExpressions: IfExprColumnNull(col 3:boolean, col 4:bigint, null)(children: LongColEqualLongScalar(col 0:bigint, val 1) -> 3:boolean, LongColAddLongScalar(col 1:bigint, val 1) -> 4:bigint) -> 5:bigint
+ selectExpressions: IfExprCondExprNull(col 3:boolean, col 4:bigint, null)(children: LongColEqualLongScalar(col 0:bigint, val 1) -> 3:boolean, LongColAddLongScalar(col 1:bigint, val 1) -> 4:bigint) -> 5:bigint
Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
[6/7] hive git commit: HIVE-18819: Vectorization: Optimize IF
statement expression evaluation of THEN/ELSE (Matt McCline,
reviewed by Teddy Choi)
Posted by mm...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/53980ba6/ql/src/test/results/clientpositive/llap/vector_case_when_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_case_when_1.q.out b/ql/src/test/results/clientpositive/llap/vector_case_when_1.q.out
new file mode 100644
index 0000000..bd590e2
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/vector_case_when_1.q.out
@@ -0,0 +1,1204 @@
+PREHOOK: query: CREATE TABLE lineitem_test_txt (L_ORDERKEY INT,
+ L_PARTKEY INT,
+ L_SUPPKEY INT,
+ L_LINENUMBER INT,
+ L_QUANTITY INT,
+ L_EXTENDEDPRICE DOUBLE,
+ L_DISCOUNT DOUBLE,
+ L_TAX DECIMAL(10,2),
+ L_RETURNFLAG CHAR(1),
+ L_LINESTATUS CHAR(1),
+ l_shipdate DATE,
+ L_COMMITDATE DATE,
+ L_RECEIPTDATE DATE,
+ L_SHIPINSTRUCT VARCHAR(20),
+ L_SHIPMODE CHAR(10),
+ L_COMMENT STRING)
+ROW FORMAT DELIMITED
+FIELDS TERMINATED BY '|'
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@lineitem_test_txt
+POSTHOOK: query: CREATE TABLE lineitem_test_txt (L_ORDERKEY INT,
+ L_PARTKEY INT,
+ L_SUPPKEY INT,
+ L_LINENUMBER INT,
+ L_QUANTITY INT,
+ L_EXTENDEDPRICE DOUBLE,
+ L_DISCOUNT DOUBLE,
+ L_TAX DECIMAL(10,2),
+ L_RETURNFLAG CHAR(1),
+ L_LINESTATUS CHAR(1),
+ l_shipdate DATE,
+ L_COMMITDATE DATE,
+ L_RECEIPTDATE DATE,
+ L_SHIPINSTRUCT VARCHAR(20),
+ L_SHIPMODE CHAR(10),
+ L_COMMENT STRING)
+ROW FORMAT DELIMITED
+FIELDS TERMINATED BY '|'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@lineitem_test_txt
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem_test_txt
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@lineitem_test_txt
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem_test_txt
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@lineitem_test_txt
+PREHOOK: query: CREATE TABLE lineitem_test STORED AS ORC AS SELECT * FROM lineitem_test_txt
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@lineitem_test_txt
+PREHOOK: Output: database:default
+PREHOOK: Output: default@lineitem_test
+POSTHOOK: query: CREATE TABLE lineitem_test STORED AS ORC AS SELECT * FROM lineitem_test_txt
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@lineitem_test_txt
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@lineitem_test
+POSTHOOK: Lineage: lineitem_test.l_comment SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_comment, type:string, comment:null), ]
+POSTHOOK: Lineage: lineitem_test.l_commitdate SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_commitdate, type:date, comment:null), ]
+POSTHOOK: Lineage: lineitem_test.l_discount SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_discount, type:double, comment:null), ]
+POSTHOOK: Lineage: lineitem_test.l_extendedprice SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_extendedprice, type:double, comment:null), ]
+POSTHOOK: Lineage: lineitem_test.l_linenumber SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_linenumber, type:int, comment:null), ]
+POSTHOOK: Lineage: lineitem_test.l_linestatus SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_linestatus, type:char(1), comment:null), ]
+POSTHOOK: Lineage: lineitem_test.l_orderkey SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_orderkey, type:int, comment:null), ]
+POSTHOOK: Lineage: lineitem_test.l_partkey SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_partkey, type:int, comment:null), ]
+POSTHOOK: Lineage: lineitem_test.l_quantity SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_quantity, type:int, comment:null), ]
+POSTHOOK: Lineage: lineitem_test.l_receiptdate SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_receiptdate, type:date, comment:null), ]
+POSTHOOK: Lineage: lineitem_test.l_returnflag SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_returnflag, type:char(1), comment:null), ]
+POSTHOOK: Lineage: lineitem_test.l_shipdate SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_shipdate, type:date, comment:null), ]
+POSTHOOK: Lineage: lineitem_test.l_shipinstruct SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_shipinstruct, type:varchar(20), comment:null), ]
+POSTHOOK: Lineage: lineitem_test.l_shipmode SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_shipmode, type:char(10), comment:null), ]
+POSTHOOK: Lineage: lineitem_test.l_suppkey SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_suppkey, type:int, comment:null), ]
+POSTHOOK: Lineage: lineitem_test.l_tax SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_tax, type:decimal(10,2), comment:null), ]
+lineitem_test_txt.l_orderkey lineitem_test_txt.l_partkey lineitem_test_txt.l_suppkey lineitem_test_txt.l_linenumber lineitem_test_txt.l_quantity lineitem_test_txt.l_extendedprice lineitem_test_txt.l_discount lineitem_test_txt.l_tax lineitem_test_txt.l_returnflag lineitem_test_txt.l_linestatus lineitem_test_txt.l_shipdate lineitem_test_txt.l_commitdate lineitem_test_txt.l_receiptdate lineitem_test_txt.l_shipinstruct lineitem_test_txt.l_shipmode lineitem_test_txt.l_comment
+PREHOOK: query: INSERT INTO TABLE lineitem_test VALUES (NULL,NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@lineitem_test
+POSTHOOK: query: INSERT INTO TABLE lineitem_test VALUES (NULL,NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@lineitem_test
+POSTHOOK: Lineage: lineitem_test.l_comment EXPRESSION []
+POSTHOOK: Lineage: lineitem_test.l_commitdate EXPRESSION []
+POSTHOOK: Lineage: lineitem_test.l_discount EXPRESSION []
+POSTHOOK: Lineage: lineitem_test.l_extendedprice EXPRESSION []
+POSTHOOK: Lineage: lineitem_test.l_linenumber EXPRESSION []
+POSTHOOK: Lineage: lineitem_test.l_linestatus EXPRESSION []
+POSTHOOK: Lineage: lineitem_test.l_orderkey EXPRESSION []
+POSTHOOK: Lineage: lineitem_test.l_partkey EXPRESSION []
+POSTHOOK: Lineage: lineitem_test.l_quantity EXPRESSION []
+POSTHOOK: Lineage: lineitem_test.l_receiptdate EXPRESSION []
+POSTHOOK: Lineage: lineitem_test.l_returnflag EXPRESSION []
+POSTHOOK: Lineage: lineitem_test.l_shipdate EXPRESSION []
+POSTHOOK: Lineage: lineitem_test.l_shipinstruct EXPRESSION []
+POSTHOOK: Lineage: lineitem_test.l_shipmode EXPRESSION []
+POSTHOOK: Lineage: lineitem_test.l_suppkey EXPRESSION []
+POSTHOOK: Lineage: lineitem_test.l_tax EXPRESSION []
+_col0 _col1 _col2 _col3 _col4 _col5 _col6 _col7 _col8 _col9 _col10 _col11 _col12 _col13 _col14 _col15
+PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
+SELECT
+ L_QUANTITY as Quantity,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN "Many"
+ ELSE "Huge number" END AS Quantity_Description,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN "Many"
+ ELSE NULL END AS Quantity_Description_2,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN NULL
+ ELSE NULL END AS Quantity_Description_3,
+ IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date,
+ CASE WHEN L_RETURNFLAG = "N"
+ THEN l_extendedprice * (1 - l_discount)
+ ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting.
+ CASE WHEN L_RETURNFLAG = "N"
+ THEN l_extendedprice * (1 - l_discount)
+ ELSE CAST(0 AS DOUBLE) END AS Field_2,
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4,
+ -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal.
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6,
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8,
+ IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9,
+ IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10,
+ IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11,
+ IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12
+FROM lineitem_test
+ORDER BY Quantity
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
+SELECT
+ L_QUANTITY as Quantity,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN "Many"
+ ELSE "Huge number" END AS Quantity_Description,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN "Many"
+ ELSE NULL END AS Quantity_Description_2,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN NULL
+ ELSE NULL END AS Quantity_Description_3,
+ IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date,
+ CASE WHEN L_RETURNFLAG = "N"
+ THEN l_extendedprice * (1 - l_discount)
+ ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting.
+ CASE WHEN L_RETURNFLAG = "N"
+ THEN l_extendedprice * (1 - l_discount)
+ ELSE CAST(0 AS DOUBLE) END AS Field_2,
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4,
+ -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal.
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6,
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8,
+ IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9,
+ IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10,
+ IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11,
+ IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12
+FROM lineitem_test
+ORDER BY Quantity
+POSTHOOK: type: QUERY
+Explain
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: lineitem_test
+ Statistics: Num rows: 101 Data size: 57327 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: l_quantity (type: int), CASE WHEN ((l_quantity = 1)) THEN ('Single') WHEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') WHEN ((l_quantity < 100)) THEN ('Many') ELSE ('Huge number') END (type: string), CASE WHEN ((l_quantity = 1)) THEN ('Single') WHEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') WHEN ((l_quantity < 100)) THEN ('Many') ELSE (null) END (type: string), CASE WHEN ((l_quantity = 1)) THEN ('Single') WHEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') WHEN ((l_quantity < 100)) THEN (null) ELSE (null) END (type: string), if((l_shipmode = 'SHIP '), date_add(l_shipdate, 10), date_add(l_shipdate, 5)) (type: date), CASE WHEN ((l_returnflag = 'N')) THEN ((l_extendedprice * (1.0D - l_discount))) ELSE (0) END (type: double), CASE WHEN ((l_returnflag = 'N')) THEN ((l_extendedprice * (1.0D - l_discount))) ELSE (0.0D) END (type: double), if((UDFToString(l_shipinstruct) = 'D
ELIVER IN PERSON'), null, l_tax) (type: decimal(10,2)), if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, null) (type: decimal(10,2)), if((UDFToString(l_shipinstruct) = 'DELIVER IN PERSON'), 0, l_tax) (type: decimal(12,2)), if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, 0) (type: decimal(12,2)), if((UDFToString(l_shipinstruct) = 'DELIVER IN PERSON'), 0, l_tax) (type: decimal(10,2)), if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, 0) (type: decimal(10,2)), if((l_partkey > 30), CAST( l_receiptdate AS TIMESTAMP), CAST( l_commitdate AS TIMESTAMP)) (type: timestamp), if((l_suppkey > 10000), datediff(l_receiptdate, l_commitdate), null) (type: int), if((l_suppkey > 10000), null, datediff(l_receiptdate, l_commitdate)) (type: int), if(((l_suppkey % 500) > 100), DATE'2009-01-01', DATE'2009-12-31') (type: date)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16
+ Statistics: Num rows: 101 Data size: 57327 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Statistics: Num rows: 101 Data size: 57327 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: date), _col5 (type: double), _col6 (type: double), _col7 (type: decimal(10,2)), _col8 (type: decimal(10,2)), _col9 (type: decimal(12,2)), _col10 (type: decimal(12,2)), _col11 (type: decimal(10,2)), _col12 (type: decimal(10,2)), _col13 (type: timestamp), _col14 (type: int), _col15 (type: int), _col16 (type: date)
+ Execution mode: llap
+ LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ notVectorizedReason: SELECT operator: Unexpected primitive type category VOID
+ vectorized: false
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ reduceColumnNullOrder: a
+ reduceColumnSortOrder: +
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 17
+ dataColumns: KEY.reducesinkkey0:int, VALUE._col0:string, VALUE._col1:string, VALUE._col2:string, VALUE._col3:date, VALUE._col4:double, VALUE._col5:double, VALUE._col6:decimal(10,2), VALUE._col7:decimal(10,2), VALUE._col8:decimal(12,2), VALUE._col9:decimal(12,2), VALUE._col10:decimal(10,2), VALUE._col11:decimal(10,2), VALUE._col12:timestamp, VALUE._col13:int, VALUE._col14:int, VALUE._col15:date
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: date), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: decimal(10,2)), VALUE._col7 (type: decimal(10,2)), VALUE._col8 (type: decimal(12,2)), VALUE._col9 (type: decimal(12,2)), VALUE._col10 (type: decimal(10,2)), VALUE._col11 (type: decimal(10,2)), VALUE._col12 (type: timestamp), VALUE._col13 (type: int), VALUE._col14 (type: int), VALUE._col15 (type: date)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
+ Statistics: Num rows: 101 Data size: 57327 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 101 Data size: 57327 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT
+ L_QUANTITY as Quantity,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN "Many"
+ ELSE "Huge number" END AS Quantity_Description,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN "Many"
+ ELSE NULL END AS Quantity_Description_2,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN NULL
+ ELSE NULL END AS Quantity_Description_3,
+ IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date,
+ CASE WHEN L_RETURNFLAG = "N"
+ THEN l_extendedprice * (1 - l_discount)
+ ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting.
+ CASE WHEN L_RETURNFLAG = "N"
+ THEN l_extendedprice * (1 - l_discount)
+ ELSE CAST(0 AS DOUBLE) END AS Field_2,
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4,
+ -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal.
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6,
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8,
+ IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9,
+ IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10,
+ IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11,
+ IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12
+FROM lineitem_test
+ORDER BY Quantity
+PREHOOK: type: QUERY
+PREHOOK: Input: default@lineitem_test
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT
+ L_QUANTITY as Quantity,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN "Many"
+ ELSE "Huge number" END AS Quantity_Description,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN "Many"
+ ELSE NULL END AS Quantity_Description_2,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN NULL
+ ELSE NULL END AS Quantity_Description_3,
+ IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date,
+ CASE WHEN L_RETURNFLAG = "N"
+ THEN l_extendedprice * (1 - l_discount)
+ ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting.
+ CASE WHEN L_RETURNFLAG = "N"
+ THEN l_extendedprice * (1 - l_discount)
+ ELSE CAST(0 AS DOUBLE) END AS Field_2,
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4,
+ -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal.
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6,
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8,
+ IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9,
+ IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10,
+ IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11,
+ IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12
+FROM lineitem_test
+ORDER BY Quantity
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@lineitem_test
+#### A masked pattern was here ####
+quantity quantity_description quantity_description_2 quantity_description_3 expected_date field_1 field_2 field_3 field_4 field_5 field_6 field_7 field_8 field_9 field_10 field_11 field_12
+NULL Huge number NULL NULL NULL 0.0 0.0 NULL NULL NULL 0.00 NULL 0.00 NULL NULL NULL 2009-12-31
+1 Single Single Single 1994-12-06 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-12-15 00:00:00 NULL 3 2009-01-01
+1 Single Single Single 1994-01-31 0.0 0.0 0.05 0.05 0.05 0.05 0.05 0.05 1994-01-28 00:00:00 NULL -36 2009-01-01
+2 Two Two Two 1993-12-09 0.0 0.0 0.06 NULL 0.06 0.00 0.06 0.00 1994-01-01 00:00:00 NULL -6 2009-01-01
+2 Two Two Two 1995-08-12 2011.3912000000003 2011.3912000000003 NULL NULL 0.00 0.00 0.00 0.00 1995-08-23 00:00:00 NULL -45 2009-01-01
+3 Some Some Some 1998-07-09 2778.921 2778.921 0.02 NULL 0.02 0.00 0.02 0.00 1998-07-21 00:00:00 NULL 46 2009-12-31
+3 Some Some Some 1998-06-02 5137.6143 5137.6143 0.07 NULL 0.07 0.00 0.07 0.00 1998-06-02 00:00:00 NULL 60 2009-01-01
+3 Some Some Some 1994-06-11 0.0 0.0 0.04 NULL 0.04 0.00 0.04 0.00 1994-06-15 00:00:00 NULL -42 2009-12-31
+4 Some Some Some 1995-08-09 5990.4936 5990.4936 0.03 NULL 0.03 0.00 0.03 0.00 1995-09-03 00:00:00 NULL -28 2009-01-01
+4 Some Some Some 1997-04-27 5669.7732000000005 5669.7732000000005 0.04 NULL 0.04 0.00 0.04 0.00 1997-04-20 00:00:00 NULL 79 2009-01-01
+5 Some Some Some 1996-02-15 6217.103999999999 6217.103999999999 0.02 NULL 0.02 0.00 0.02 0.00 1996-02-13 00:00:00 NULL -42 2009-01-01
+5 Some Some Some 1997-02-25 8116.96 8116.96 NULL NULL 0.00 0.00 0.00 0.00 1997-02-21 00:00:00 NULL 9 2009-01-01
+5 Some Some Some 1993-12-14 0.0 0.0 0.03 0.03 0.03 0.03 0.03 0.03 1993-12-23 00:00:00 NULL -2 2009-01-01
+6 Some Some Some 1998-11-04 9487.6152 9487.6152 0.06 NULL 0.06 0.00 0.06 0.00 1998-11-05 00:00:00 NULL 46 2009-12-31
+6 Some Some Some 1995-07-26 8793.2736 8793.2736 0.03 NULL 0.03 0.00 0.03 0.00 1995-07-25 00:00:00 NULL -60 2009-01-01
+7 Some Some Some 1996-01-24 12613.136199999999 12613.136199999999 0.04 NULL 0.04 0.00 0.04 0.00 1996-01-29 00:00:00 NULL 38 2009-01-01
+8 Some Some Some 1996-02-03 11978.640000000001 11978.640000000001 0.02 0.02 0.02 0.02 0.02 0.02 1996-01-31 00:00:00 NULL -34 2009-01-01
+8 Some Some Some 1994-01-17 0.0 0.0 0.08 0.08 0.08 0.08 0.08 0.08 1994-01-14 00:00:00 NULL -44 2009-01-01
+9 Some Some Some 1996-02-11 10666.6272 10666.6272 0.08 0.08 0.08 0.08 0.08 0.08 1996-02-19 00:00:00 NULL -12 2009-01-01
+11 Many Many NULL 1994-03-22 0.0 0.0 0.05 NULL 0.05 0.00 0.05 0.00 1994-03-27 00:00:00 NULL 10 2009-01-01
+12 Many Many NULL 1996-05-12 12655.998 12655.998 0.03 0.03 0.03 0.03 0.03 0.03 1996-06-03 00:00:00 NULL 82 2009-01-01
+12 Many Many NULL 1997-02-01 12156.034800000001 12156.034800000001 0.05 NULL 0.05 0.00 0.05 0.00 1997-02-22 00:00:00 NULL 1 2009-01-01
+13 Many Many NULL 1998-10-28 17554.68 17554.68 0.07 NULL 0.07 0.00 0.07 0.00 1998-11-06 00:00:00 NULL 53 2009-01-01
+13 Many Many NULL 1993-04-06 0.0 0.0 0.02 NULL 0.02 0.00 0.02 0.00 1993-04-08 00:00:00 NULL 4 2009-01-01
+13 Many Many NULL 1994-03-08 0.0 0.0 0.06 NULL 0.06 0.00 0.06 0.00 1994-03-26 00:00:00 NULL 41 2009-01-01
+14 Many Many NULL 1995-01-04 0.0 0.0 0.02 NULL 0.02 0.00 0.02 0.00 1995-01-27 00:00:00 NULL 66 2009-01-01
+15 Many Many NULL 1994-11-05 0.0 0.0 0.04 NULL 0.04 0.00 0.04 0.00 1994-11-20 00:00:00 NULL 81 2009-12-31
+17 Many Many NULL 1996-03-18 20321.500799999998 20321.500799999998 NULL NULL 0.00 0.00 0.00 0.00 1996-03-22 00:00:00 NULL 39 2009-01-01
+17 Many Many NULL 1994-07-07 0.0 0.0 0.00 0.00 0.00 0.00 0.00 0.00 1994-07-03 00:00:00 NULL -4 2009-01-01
+19 Many Many NULL 1994-02-05 0.0 0.0 0.03 0.03 0.03 0.03 0.03 0.03 1994-02-06 00:00:00 NULL -11 2009-01-01
+19 Many Many NULL 1993-05-19 0.0 0.0 0.08 0.08 0.08 0.08 0.08 0.08 1993-05-25 00:00:00 NULL 81 2009-01-01
+20 Many Many NULL 1998-07-02 32042.592 32042.592 0.01 NULL 0.01 0.00 0.01 0.00 1998-07-02 00:00:00 NULL 40 2009-01-01
+21 Many Many NULL 1995-07-11 24640.0518 24640.0518 NULL NULL 0.00 0.00 0.00 0.00 1995-07-31 00:00:00 NULL 78 2009-01-01
+21 Many Many NULL 1994-10-05 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-10-26 00:00:00 NULL 38 2009-01-01
+22 Many Many NULL 1998-10-14 28405.0184 28405.0184 0.06 NULL 0.06 0.00 0.06 0.00 1998-10-12 00:00:00 NULL -4 2009-01-01
+22 Many Many NULL 1995-07-22 39353.82 39353.82 0.05 NULL 0.05 0.00 0.05 0.00 1995-07-19 00:00:00 NULL 45 2009-01-01
+23 Many Many NULL 1997-04-24 33946.3785 33946.3785 NULL NULL 0.00 0.00 0.00 0.00 1997-05-06 00:00:00 NULL 81 2009-01-01
+23 Many Many NULL 1994-07-24 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-07-25 00:00:00 NULL 26 2009-01-01
+23 Many Many NULL 1994-10-13 0.0 0.0 0.00 NULL 0.00 0.00 0.00 0.00 1994-10-24 00:00:00 NULL 79 2009-12-31
+24 Many Many NULL 1996-02-26 31762.584 31762.584 0.00 0.00 0.00 0.00 0.00 0.00 1996-03-18 00:00:00 NULL 75 2009-01-01
+24 Many Many NULL 1996-04-04 20542.032 20542.032 0.04 NULL 0.04 0.00 0.04 0.00 1996-04-01 00:00:00 NULL 18 2009-12-31
+25 Many Many NULL 1995-12-06 27263.995 27263.995 NULL NULL 0.00 0.00 0.00 0.00 1995-12-21 00:00:00 NULL -4 2009-01-01
+25 Many Many NULL 1998-04-15 43064.1575 43064.1575 0.07 NULL 0.07 0.00 0.07 0.00 1998-04-11 00:00:00 NULL -11 2009-01-01
+26 Many Many NULL 1996-11-09 39912.433600000004 39912.433600000004 0.04 NULL 0.04 0.00 0.04 0.00 1996-11-20 00:00:00 NULL 31 2009-01-01
+26 Many Many NULL 1995-04-25 0.0 0.0 0.03 NULL 0.03 0.00 0.03 0.00 1995-05-13 00:00:00 NULL 18 2009-01-01
+26 Many Many NULL 1993-11-03 0.0 0.0 0.02 0.02 0.02 0.02 0.02 0.02 1993-11-04 00:00:00 NULL -44 2009-01-01
+26 Many Many NULL 1994-10-21 0.0 0.0 0.08 NULL 0.08 0.00 0.08 0.00 1994-10-19 00:00:00 NULL 24 2009-01-01
+27 Many Many NULL 1998-06-29 45590.2425 45590.2425 NULL NULL 0.00 0.00 0.00 0.00 1998-06-29 00:00:00 NULL 4 2009-01-01
+27 Many Many NULL 1994-01-26 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-01-23 00:00:00 NULL 62 2009-01-01
+28 Many Many NULL 1993-12-19 0.0 0.0 0.00 0.00 0.00 0.00 0.00 0.00 1994-01-01 00:00:00 NULL -9 2009-01-01
+28 Many Many NULL 1996-04-26 26349.6324 26349.6324 0.06 NULL 0.06 0.00 0.06 0.00 1996-05-16 00:00:00 NULL 47 2009-01-01
+28 Many Many NULL 1994-12-29 0.0 0.0 0.07 NULL 0.07 0.00 0.07 0.00 1995-01-16 00:00:00 NULL 83 2009-01-01
+28 Many Many NULL 1995-10-28 44866.219999999994 44866.219999999994 0.08 0.08 0.08 0.08 0.08 0.08 1995-10-26 00:00:00 NULL 60 2009-01-01
+28 Many Many NULL 1996-02-06 45975.3616 45975.3616 0.02 NULL 0.02 0.00 0.02 0.00 1996-02-28 00:00:00 NULL 66 2009-01-01
+28 Many Many NULL 1996-03-26 30855.6612 30855.6612 0.04 NULL 0.04 0.00 0.04 0.00 1996-04-20 00:00:00 NULL 12 2009-12-31
+29 Many Many NULL 1997-01-30 39341.806 39341.806 NULL NULL 0.00 0.00 0.00 0.00 1997-01-27 00:00:00 NULL 0 2009-01-01
+30 Many Many NULL 1998-08-16 44561.46 44561.46 0.06 NULL 0.06 0.00 0.06 0.00 1998-08-14 00:00:00 NULL 34 2009-12-31
+30 Many Many NULL 1996-01-15 29770.173 29770.173 NULL NULL 0.00 0.00 0.00 0.00 1996-01-18 00:00:00 NULL 35 2009-12-31
+30 Many Many NULL 1994-06-08 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-06-22 00:00:00 NULL 24 2009-01-01
+31 Many Many NULL 1993-11-03 0.0 0.0 0.04 NULL 0.04 0.00 0.04 0.00 1993-11-08 00:00:00 NULL -41 2009-01-01
+31 Many Many NULL 1994-02-24 0.0 0.0 0.08 0.08 0.08 0.08 0.08 0.08 1994-02-20 00:00:00 NULL -19 2009-01-01
+32 Many Many NULL 1995-08-19 63313.3312 63313.3312 0.00 NULL 0.00 0.00 0.00 0.00 1995-08-27 00:00:00 NULL -41 2009-01-01
+32 Many Many NULL 1993-12-14 0.0 0.0 0.05 NULL 0.05 0.00 0.05 0.00 1993-12-28 00:00:00 NULL -7 2009-12-31
+32 Many Many NULL 1996-10-07 44955.15839999999 44955.15839999999 0.05 NULL 0.05 0.00 0.05 0.00 1996-10-14 00:00:00 NULL -66 2009-12-31
+32 Many Many NULL 1994-08-29 0.0 0.0 0.06 NULL 0.06 0.00 0.06 0.00 1994-08-31 00:00:00 NULL 14 2009-01-01
+32 Many Many NULL 1996-02-04 46146.7488 46146.7488 NULL NULL 0.00 0.00 0.00 0.00 1996-02-03 00:00:00 NULL -4 2009-01-01
+33 Many Many NULL 1998-04-17 54174.12 54174.12 0.01 NULL 0.01 0.00 0.01 0.00 1998-04-15 00:00:00 NULL 26 2009-01-01
+34 Many Many NULL 1996-01-27 63982.002400000005 63982.002400000005 NULL NULL 0.00 0.00 0.00 0.00 1996-01-27 00:00:00 NULL 21 2009-01-01
+34 Many Many NULL 1995-11-13 60586.5448 60586.5448 0.06 NULL 0.06 0.00 0.06 0.00 1995-11-26 00:00:00 NULL -50 2009-01-01
+34 Many Many NULL 1998-03-10 56487.763199999994 56487.763199999994 NULL NULL 0.00 0.00 0.00 0.00 1998-03-30 00:00:00 NULL -23 2009-01-01
+35 Many Many NULL 1996-01-21 40475.225 40475.225 0.03 0.03 0.03 0.03 0.03 0.03 1996-01-22 00:00:00 NULL -32 2009-01-01
+36 Many Many NULL 1996-04-17 41844.6756 41844.6756 0.06 0.06 0.06 0.06 0.06 0.06 1996-04-20 00:00:00 NULL 52 2009-01-01
+37 Many Many NULL 1993-04-23 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1993-04-14 00:00:00 NULL 15 2009-12-31
+37 Many Many NULL 1992-05-02 0.0 0.0 0.03 0.03 0.03 0.03 0.03 0.03 1992-05-02 00:00:00 NULL -13 2009-01-01
+37 Many Many NULL 1994-02-18 0.0 0.0 0.04 NULL 0.04 0.00 0.04 0.00 1994-02-21 00:00:00 NULL -23 2009-01-01
+38 Many Many NULL 1997-02-02 44694.46 44694.46 0.05 0.05 0.05 0.05 0.05 0.05 1997-02-02 00:00:00 NULL 19 2009-01-01
+38 Many Many NULL 1996-02-16 68028.3144 68028.3144 NULL NULL 0.00 0.00 0.00 0.00 1996-02-18 00:00:00 NULL -6 2009-01-01
+39 Many Many NULL 1992-07-07 0.0 0.0 0.02 0.02 0.02 0.02 0.02 0.02 1992-07-28 00:00:00 NULL -21 2009-01-01
+39 Many Many NULL 1998-02-03 45146.01 45146.01 NULL NULL 0.00 0.00 0.00 0.00 1998-02-18 00:00:00 NULL -48 2009-01-01
+40 Many Many NULL 1992-07-26 0.0 0.0 0.03 NULL 0.03 0.00 0.03 0.00 1992-08-15 00:00:00 NULL 14 2009-01-01
+40 Many Many NULL 1996-12-13 51224.736 51224.736 0.05 NULL 0.05 0.00 0.05 0.00 1997-01-01 00:00:00 NULL 71 2009-01-01
+41 Many Many NULL 1994-02-26 0.0 0.0 0.07 NULL 0.07 0.00 0.07 0.00 1994-03-18 00:00:00 NULL 17 2009-01-01
+41 Many Many NULL 1998-07-04 47989.6144 47989.6144 0.08 NULL 0.08 0.00 0.08 0.00 1998-07-06 00:00:00 NULL 9 2009-01-01
+41 Many Many NULL 1993-11-14 0.0 0.0 0.00 0.00 0.00 0.00 0.00 0.00 1993-11-11 00:00:00 NULL -74 2009-01-01
+42 Many Many NULL 1994-08-05 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-08-28 00:00:00 NULL 33 2009-12-31
+42 Many Many NULL 1996-02-13 68289.9672 68289.9672 0.00 NULL 0.00 0.00 0.00 0.00 1996-02-23 00:00:00 NULL 33 2009-01-01
+43 Many Many NULL 1996-10-22 62727.3207 62727.3207 0.01 NULL 0.01 0.00 0.01 0.00 1996-10-26 00:00:00 NULL -19 2009-12-31
+43 Many Many NULL 1992-07-15 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1992-08-02 00:00:00 NULL 27 2009-01-01
+44 Many Many NULL 1997-03-23 60781.124800000005 60781.124800000005 NULL NULL 0.00 0.00 0.00 0.00 1997-04-13 00:00:00 NULL 74 2009-12-31
+44 Many Many NULL 1995-09-02 75106.658 75106.658 NULL NULL 0.00 0.00 0.00 0.00 1995-09-14 00:00:00 NULL 25 2009-01-01
+44 Many Many NULL 1996-10-04 80882.4192 80882.4192 0.02 NULL 0.02 0.00 0.02 0.00 1996-09-30 00:00:00 NULL -48 2009-01-01
+44 Many Many NULL 1996-11-19 48941.692800000004 48941.692800000004 0.06 NULL 0.06 0.00 0.06 0.00 1996-12-12 00:00:00 NULL -3 2009-01-01
+45 Many Many NULL 1994-02-07 0.0 0.0 0.00 NULL 0.00 0.00 0.00 0.00 1994-02-23 00:00:00 NULL 50 2009-01-01
+45 Many Many NULL 1998-03-05 61489.35 61489.35 NULL NULL 0.00 0.00 0.00 0.00 1998-03-24 00:00:00 NULL 4 2009-01-01
+46 Many Many NULL 1996-01-20 73475.892 73475.892 0.07 NULL 0.07 0.00 0.07 0.00 1996-02-03 00:00:00 NULL -53 2009-01-01
+46 Many Many NULL 1996-10-01 77781.4092 77781.4092 NULL NULL 0.00 0.00 0.00 0.00 1996-10-26 00:00:00 NULL -54 2009-01-01
+46 Many Many NULL 1998-08-18 84565.5168 84565.5168 0.05 NULL 0.05 0.00 0.05 0.00 1998-08-29 00:00:00 NULL 52 2009-01-01
+46 Many Many NULL 1998-07-01 56583.5144 56583.5144 0.05 NULL 0.05 0.00 0.05 0.00 1998-07-05 00:00:00 NULL 28 2009-01-01
+48 Many Many NULL 1994-08-22 0.0 0.0 0.07 NULL 0.07 0.00 0.07 0.00 1994-09-08 00:00:00 NULL 28 2009-01-01
+49 Many Many NULL 1993-11-14 0.0 0.0 0.00 0.00 0.00 0.00 0.00 0.00 1993-11-24 00:00:00 NULL -26 2009-12-31
+50 Many Many NULL 1994-08-13 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-08-26 00:00:00 NULL -48 2009-12-31
+PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
+SELECT
+ L_QUANTITY as Quantity,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN "Many"
+ ELSE "Huge number" END AS Quantity_Description,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN "Many"
+ ELSE NULL END AS Quantity_Description_2,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN NULL
+ ELSE NULL END AS Quantity_Description_3,
+ IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date,
+ CASE WHEN L_RETURNFLAG = "N"
+ THEN l_extendedprice * (1 - l_discount)
+ ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting.
+ CASE WHEN L_RETURNFLAG = "N"
+ THEN l_extendedprice * (1 - l_discount)
+ ELSE CAST(0 AS DOUBLE) END AS Field_2,
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4,
+ -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal.
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6,
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8,
+ IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9,
+ IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10,
+ IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11,
+ IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12
+FROM lineitem_test
+ORDER BY Quantity
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
+SELECT
+ L_QUANTITY as Quantity,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN "Many"
+ ELSE "Huge number" END AS Quantity_Description,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN "Many"
+ ELSE NULL END AS Quantity_Description_2,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN NULL
+ ELSE NULL END AS Quantity_Description_3,
+ IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date,
+ CASE WHEN L_RETURNFLAG = "N"
+ THEN l_extendedprice * (1 - l_discount)
+ ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting.
+ CASE WHEN L_RETURNFLAG = "N"
+ THEN l_extendedprice * (1 - l_discount)
+ ELSE CAST(0 AS DOUBLE) END AS Field_2,
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4,
+ -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal.
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6,
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8,
+ IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9,
+ IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10,
+ IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11,
+ IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12
+FROM lineitem_test
+ORDER BY Quantity
+POSTHOOK: type: QUERY
+Explain
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: lineitem_test
+ Statistics: Num rows: 101 Data size: 57327 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:l_orderkey:int, 1:l_partkey:int, 2:l_suppkey:int, 3:l_linenumber:int, 4:l_quantity:int, 5:l_extendedprice:double, 6:l_discount:double, 7:l_tax:decimal(10,2), 8:l_returnflag:char(1), 9:l_linestatus:char(1), 10:l_shipdate:date, 11:l_commitdate:date, 12:l_receiptdate:date, 13:l_shipinstruct:varchar(20), 14:l_shipmode:char(10), 15:l_comment:string, 16:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>]
+ Select Operator
+ expressions: l_quantity (type: int), CASE WHEN ((l_quantity = 1)) THEN ('Single') WHEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') WHEN ((l_quantity < 100)) THEN ('Many') ELSE ('Huge number') END (type: string), CASE WHEN ((l_quantity = 1)) THEN ('Single') WHEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') WHEN ((l_quantity < 100)) THEN ('Many') ELSE (null) END (type: string), CASE WHEN ((l_quantity = 1)) THEN ('Single') WHEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') WHEN ((l_quantity < 100)) THEN (null) ELSE (null) END (type: string), if((l_shipmode = 'SHIP '), date_add(l_shipdate, 10), date_add(l_shipdate, 5)) (type: date), CASE WHEN ((l_returnflag = 'N')) THEN ((l_extendedprice * (1.0D - l_discount))) ELSE (0) END (type: double), CASE WHEN ((l_returnflag = 'N')) THEN ((l_extendedprice * (1.0D - l_discount))) ELSE (0.0D) END (type: double), if((UDFToString(l_shipinstruct) = 'D
ELIVER IN PERSON'), null, l_tax) (type: decimal(10,2)), if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, null) (type: decimal(10,2)), if((UDFToString(l_shipinstruct) = 'DELIVER IN PERSON'), 0, l_tax) (type: decimal(12,2)), if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, 0) (type: decimal(12,2)), if((UDFToString(l_shipinstruct) = 'DELIVER IN PERSON'), 0, l_tax) (type: decimal(10,2)), if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, 0) (type: decimal(10,2)), if((l_partkey > 30), CAST( l_receiptdate AS TIMESTAMP), CAST( l_commitdate AS TIMESTAMP)) (type: timestamp), if((l_suppkey > 10000), datediff(l_receiptdate, l_commitdate), null) (type: int), if((l_suppkey > 10000), null, datediff(l_receiptdate, l_commitdate)) (type: int), if(((l_suppkey % 500) > 100), DATE'2009-01-01', DATE'2009-12-31') (type: date)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [4, 22, 24, 25, 26, 27, 28, 30, 31, 32, 33, 34, 35, 38, 40, 43, 44]
+ selectExpressions: IfExprStringScalarStringGroupColumn(col 17:boolean, val Singlecol 21:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 17:boolean, IfExprStringScalarStringGroupColumn(col 18:boolean, val Twocol 22:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 18:boolean, IfExprStringScalarStringGroupColumn(col 19:boolean, val Somecol 21:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 19:boolean, IfExprStringScalarStringScalar(col 20:boolean, val Many, val Huge number)(children: LongColLessLongScalar(col 4:int, val 100) -> 20:boolean) -> 21:string) -> 22:string) -> 21:string) -> 22:string, IfExprStringScalarStringGroupColumn(col 17:boolean, val Singlecol 23:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 17:boolean, IfExprStringScalarStringGroupColumn(col 18:boolean, val Twocol 24:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 18:boolean, IfExprStringScalarStringGroupColumn(col 19:boolean
, val Somecol 23:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 19:boolean, IfExprColumnNull(col 20:boolean, col 21:string, null)(children: LongColLessLongScalar(col 4:int, val 100) -> 20:boolean, ConstantVectorExpression(val Many) -> 21:string) -> 23:string) -> 24:string) -> 23:string) -> 24:string, IfExprStringScalarStringGroupColumn(col 17:boolean, val Singlecol 23:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 17:boolean, IfExprStringScalarStringGroupColumn(col 18:boolean, val Twocol 25:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 18:boolean, IfExprStringScalarStringGroupColumn(col 19:boolean, val Somecol 23:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 19:boolean, IfExprNullNull(null, null) -> 23:string) -> 25:string) -> 23:string) -> 25:string, IfExprLongColumnLongColumn(col 17:boolean, col 18:date, col 19:date)(children: StringGroupColEqualCharScalar(col 14:char(10), val SHIP) -> 17:boolean, VectorUDFDateAddColSca
lar(col 10:date, val 10) -> 18:date, VectorUDFDateAddColScalar(col 10:date, val 5) -> 19:date) -> 26:date, IfExprDoubleColumnLongScalar(col 17:boolean, col 28:double, val 0)(children: StringGroupColEqualCharScalar(col 8:char(1), val N) -> 17:boolean, DoubleColMultiplyDoubleColumn(col 5:double, col 27:double)(children: DoubleScalarSubtractDoubleColumn(val 1.0, col 6:double) -> 27:double) -> 28:double) -> 27:double, IfExprDoubleColumnDoubleScalar(col 17:boolean, col 29:double, val 0.0)(children: StringGroupColEqualCharScalar(col 8:char(1), val N) -> 17:boolean, DoubleColMultiplyDoubleColumn(col 5:double, col 28:double)(children: DoubleScalarSubtractDoubleColumn(val 1.0, col 6:double) -> 28:double) -> 29:double) -> 28:double, IfExprNullColumn(col 17:boolean, null, col 7)(children: StringGroupColEqualStringScalar(col 23:string, val DELIVER IN PERSON)(children: CastStringGroupToString(col 13:varchar(20)) -> 23:string) -> 17:boolean, col 7:decimal(10,2)) -> 30:decimal(10,2), IfExprColumnN
ull(col 18:boolean, col 7:decimal(10,2), null)(children: StringGroupColEqualStringScalar(col 23:string, val TAKE BACK RETURN)(children: CastStringGroupToString(col 13:varchar(20)) -> 23:string) -> 18:boolean, col 7:decimal(10,2)) -> 31:decimal(10,2), VectorUDFAdaptor(if((UDFToString(l_shipinstruct) = 'DELIVER IN PERSON'), 0, l_tax))(children: StringGroupColEqualStringScalar(col 23:string, val DELIVER IN PERSON)(children: CastStringGroupToString(col 13:varchar(20)) -> 23:string) -> 19:boolean) -> 32:decimal(12,2), VectorUDFAdaptor(if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, 0))(children: StringGroupColEqualStringScalar(col 23:string, val TAKE BACK RETURN)(children: CastStringGroupToString(col 13:varchar(20)) -> 23:string) -> 19:boolean) -> 33:decimal(12,2), VectorUDFAdaptor(if((UDFToString(l_shipinstruct) = 'DELIVER IN PERSON'), 0, l_tax))(children: StringGroupColEqualStringScalar(col 23:string, val DELIVER IN PERSON)(children: CastStringGroupToString(col 13:varchar
(20)) -> 23:string) -> 19:boolean) -> 34:decimal(10,2), VectorUDFAdaptor(if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, 0))(children: StringGroupColEqualStringScalar(col 23:string, val TAKE BACK RETURN)(children: CastStringGroupToString(col 13:varchar(20)) -> 23:string) -> 19:boolean) -> 35:decimal(10,2), IfExprTimestampColumnColumn(col 19:boolean, col 36:timestampcol 37:timestamp)(children: LongColGreaterLongScalar(col 1:int, val 30) -> 19:boolean, CastDateToTimestamp(col 12:date) -> 36:timestamp, CastDateToTimestamp(col 11:date) -> 37:timestamp) -> 38:timestamp, IfExprColumnNull(col 19:boolean, col 39:int, null)(children: LongColGreaterLongScalar(col 2:int, val 10000) -> 19:boolean, VectorUDFDateDiffColCol(col 12:date, col 11:date) -> 39:int) -> 40:int, IfExprNullColumn(col 41:boolean, null, col 42)(children: LongColGreaterLongScalar(col 2:int, val 10000) -> 41:boolean, VectorUDFDateDiffColCol(col 12:date, col 11:date) -> 42:int) -> 43:int, IfExprLongScalarLongScala
r(col 45:boolean, val 14245, val 14609)(children: LongColGreaterLongScalar(col 44:int, val 100)(children: LongColModuloLongScalar(col 2:int, val 500) -> 44:int) -> 45:boolean) -> 44:date
+ Statistics: Num rows: 101 Data size: 57327 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ keyColumnNums: [4]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ valueColumnNums: [22, 24, 25, 26, 27, 28, 30, 31, 32, 33, 34, 35, 38, 40, 43, 44]
+ Statistics: Num rows: 101 Data size: 57327 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: date), _col5 (type: double), _col6 (type: double), _col7 (type: decimal(10,2)), _col8 (type: decimal(10,2)), _col9 (type: decimal(12,2)), _col10 (type: decimal(12,2)), _col11 (type: decimal(10,2)), _col12 (type: decimal(10,2)), _col13 (type: timestamp), _col14 (type: int), _col15 (type: int), _col16 (type: date)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: []
+ featureSupportInUse: []
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: true
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 16
+ includeColumns: [1, 2, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14]
+ dataColumns: l_orderkey:int, l_partkey:int, l_suppkey:int, l_linenumber:int, l_quantity:int, l_extendedprice:double, l_discount:double, l_tax:decimal(10,2), l_returnflag:char(1), l_linestatus:char(1), l_shipdate:date, l_commitdate:date, l_receiptdate:date, l_shipinstruct:varchar(20), l_shipmode:char(10), l_comment:string
+ partitionColumnCount: 0
+ scratchColumnTypeNames: [bigint, bigint, bigint, bigint, string, string, string, string, string, bigint, double, double, double, decimal(10,2), decimal(10,2), decimal(12,2), decimal(12,2), decimal(10,2), decimal(10,2), timestamp, timestamp, timestamp, bigint, bigint, bigint, bigint, bigint, bigint, bigint]
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ reduceColumnNullOrder: a
+ reduceColumnSortOrder: +
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 17
+ dataColumns: KEY.reducesinkkey0:int, VALUE._col0:string, VALUE._col1:string, VALUE._col2:string, VALUE._col3:date, VALUE._col4:double, VALUE._col5:double, VALUE._col6:decimal(10,2), VALUE._col7:decimal(10,2), VALUE._col8:decimal(12,2), VALUE._col9:decimal(12,2), VALUE._col10:decimal(10,2), VALUE._col11:decimal(10,2), VALUE._col12:timestamp, VALUE._col13:int, VALUE._col14:int, VALUE._col15:date
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: date), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: decimal(10,2)), VALUE._col7 (type: decimal(10,2)), VALUE._col8 (type: decimal(12,2)), VALUE._col9 (type: decimal(12,2)), VALUE._col10 (type: decimal(10,2)), VALUE._col11 (type: decimal(10,2)), VALUE._col12 (type: timestamp), VALUE._col13 (type: int), VALUE._col14 (type: int), VALUE._col15 (type: date)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
+ Statistics: Num rows: 101 Data size: 57327 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 101 Data size: 57327 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT
+ L_QUANTITY as Quantity,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN "Many"
+ ELSE "Huge number" END AS Quantity_Description,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN "Many"
+ ELSE NULL END AS Quantity_Description_2,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN NULL
+ ELSE NULL END AS Quantity_Description_3,
+ IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date,
+ CASE WHEN L_RETURNFLAG = "N"
+ THEN l_extendedprice * (1 - l_discount)
+ ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting.
+ CASE WHEN L_RETURNFLAG = "N"
+ THEN l_extendedprice * (1 - l_discount)
+ ELSE CAST(0 AS DOUBLE) END AS Field_2,
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4,
+ -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal.
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6,
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8,
+ IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9,
+ IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10,
+ IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11,
+ IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12
+FROM lineitem_test
+ORDER BY Quantity
+PREHOOK: type: QUERY
+PREHOOK: Input: default@lineitem_test
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT
+ L_QUANTITY as Quantity,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN "Many"
+ ELSE "Huge number" END AS Quantity_Description,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN "Many"
+ ELSE NULL END AS Quantity_Description_2,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN NULL
+ ELSE NULL END AS Quantity_Description_3,
+ IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date,
+ CASE WHEN L_RETURNFLAG = "N"
+ THEN l_extendedprice * (1 - l_discount)
+ ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting.
+ CASE WHEN L_RETURNFLAG = "N"
+ THEN l_extendedprice * (1 - l_discount)
+ ELSE CAST(0 AS DOUBLE) END AS Field_2,
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4,
+ -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal.
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6,
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8,
+ IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9,
+ IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10,
+ IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11,
+ IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12
+FROM lineitem_test
+ORDER BY Quantity
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@lineitem_test
+#### A masked pattern was here ####
+quantity quantity_description quantity_description_2 quantity_description_3 expected_date field_1 field_2 field_3 field_4 field_5 field_6 field_7 field_8 field_9 field_10 field_11 field_12
+NULL Huge number NULL NULL NULL 0.0 0.0 NULL NULL NULL 0.00 NULL 0.00 NULL NULL NULL 2009-12-31
+1 Single Single Single 1994-12-06 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-12-15 00:00:00 NULL 3 2009-01-01
+1 Single Single Single 1994-01-31 0.0 0.0 0.05 0.05 0.05 0.05 0.05 0.05 1994-01-28 00:00:00 NULL -36 2009-01-01
+2 Two Two Two 1993-12-09 0.0 0.0 0.06 NULL 0.06 0.00 0.06 0.00 1994-01-01 00:00:00 NULL -6 2009-01-01
+2 Two Two Two 1995-08-12 2011.3912000000003 2011.3912000000003 NULL NULL 0.00 0.00 0.00 0.00 1995-08-23 00:00:00 NULL -45 2009-01-01
+3 Some Some Some 1998-07-09 2778.921 2778.921 0.02 NULL 0.02 0.00 0.02 0.00 1998-07-21 00:00:00 NULL 46 2009-12-31
+3 Some Some Some 1998-06-02 5137.6143 5137.6143 0.07 NULL 0.07 0.00 0.07 0.00 1998-06-02 00:00:00 NULL 60 2009-01-01
+3 Some Some Some 1994-06-11 0.0 0.0 0.04 NULL 0.04 0.00 0.04 0.00 1994-06-15 00:00:00 NULL -42 2009-12-31
+4 Some Some Some 1995-08-09 5990.4936 5990.4936 0.03 NULL 0.03 0.00 0.03 0.00 1995-09-03 00:00:00 NULL -28 2009-01-01
+4 Some Some Some 1997-04-27 5669.7732000000005 5669.7732000000005 0.04 NULL 0.04 0.00 0.04 0.00 1997-04-20 00:00:00 NULL 79 2009-01-01
+5 Some Some Some 1996-02-15 6217.103999999999 6217.103999999999 0.02 NULL 0.02 0.00 0.02 0.00 1996-02-13 00:00:00 NULL -42 2009-01-01
+5 Some Some Some 1997-02-25 8116.96 8116.96 NULL NULL 0.00 0.00 0.00 0.00 1997-02-21 00:00:00 NULL 9 2009-01-01
+5 Some Some Some 1993-12-14 0.0 0.0 0.03 0.03 0.03 0.03 0.03 0.03 1993-12-23 00:00:00 NULL -2 2009-01-01
+6 Some Some Some 1998-11-04 9487.6152 9487.6152 0.06 NULL 0.06 0.00 0.06 0.00 1998-11-05 00:00:00 NULL 46 2009-12-31
+6 Some Some Some 1995-07-26 8793.2736 8793.2736 0.03 NULL 0.03 0.00 0.03 0.00 1995-07-25 00:00:00 NULL -60 2009-01-01
+7 Some Some Some 1996-01-24 12613.136199999999 12613.136199999999 0.04 NULL 0.04 0.00 0.04 0.00 1996-01-29 00:00:00 NULL 38 2009-01-01
+8 Some Some Some 1996-02-03 11978.640000000001 11978.640000000001 0.02 0.02 0.02 0.02 0.02 0.02 1996-01-31 00:00:00 NULL -34 2009-01-01
+8 Some Some Some 1994-01-17 0.0 0.0 0.08 0.08 0.08 0.08 0.08 0.08 1994-01-14 00:00:00 NULL -44 2009-01-01
+9 Some Some Some 1996-02-11 10666.6272 10666.6272 0.08 0.08 0.08 0.08 0.08 0.08 1996-02-19 00:00:00 NULL -12 2009-01-01
+11 Many Many NULL 1994-03-22 0.0 0.0 0.05 NULL 0.05 0.00 0.05 0.00 1994-03-27 00:00:00 NULL 10 2009-01-01
+12 Many Many NULL 1996-05-12 12655.998 12655.998 0.03 0.03 0.03 0.03 0.03 0.03 1996-06-03 00:00:00 NULL 82 2009-01-01
+12 Many Many NULL 1997-02-01 12156.034800000001 12156.034800000001 0.05 NULL 0.05 0.00 0.05 0.00 1997-02-22 00:00:00 NULL 1 2009-01-01
+13 Many Many NULL 1998-10-28 17554.68 17554.68 0.07 NULL 0.07 0.00 0.07 0.00 1998-11-06 00:00:00 NULL 53 2009-01-01
+13 Many Many NULL 1993-04-06 0.0 0.0 0.02 NULL 0.02 0.00 0.02 0.00 1993-04-08 00:00:00 NULL 4 2009-01-01
+13 Many Many NULL 1994-03-08 0.0 0.0 0.06 NULL 0.06 0.00 0.06 0.00 1994-03-26 00:00:00 NULL 41 2009-01-01
+14 Many Many NULL 1995-01-04 0.0 0.0 0.02 NULL 0.02 0.00 0.02 0.00 1995-01-27 00:00:00 NULL 66 2009-01-01
+15 Many Many NULL 1994-11-05 0.0 0.0 0.04 NULL 0.04 0.00 0.04 0.00 1994-11-20 00:00:00 NULL 81 2009-12-31
+17 Many Many NULL 1996-03-18 20321.500799999998 20321.500799999998 NULL NULL 0.00 0.00 0.00 0.00 1996-03-22 00:00:00 NULL 39 2009-01-01
+17 Many Many NULL 1994-07-07 0.0 0.0 0.00 0.00 0.00 0.00 0.00 0.00 1994-07-03 00:00:00 NULL -4 2009-01-01
+19 Many Many NULL 1994-02-05 0.0 0.0 0.03 0.03 0.03 0.03 0.03 0.03 1994-02-06 00:00:00 NULL -11 2009-01-01
+19 Many Many NULL 1993-05-19 0.0 0.0 0.08 0.08 0.08 0.08 0.08 0.08 1993-05-25 00:00:00 NULL 81 2009-01-01
+20 Many Many NULL 1998-07-02 32042.592 32042.592 0.01 NULL 0.01 0.00 0.01 0.00 1998-07-02 00:00:00 NULL 40 2009-01-01
+21 Many Many NULL 1995-07-11 24640.0518 24640.0518 NULL NULL 0.00 0.00 0.00 0.00 1995-07-31 00:00:00 NULL 78 2009-01-01
+21 Many Many NULL 1994-10-05 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-10-26 00:00:00 NULL 38 2009-01-01
+22 Many Many NULL 1998-10-14 28405.0184 28405.0184 0.06 NULL 0.06 0.00 0.06 0.00 1998-10-12 00:00:00 NULL -4 2009-01-01
+22 Many Many NULL 1995-07-22 39353.82 39353.82 0.05 NULL 0.05 0.00 0.05 0.00 1995-07-19 00:00:00 NULL 45 2009-01-01
+23 Many Many NULL 1997-04-24 33946.3785 33946.3785 NULL NULL 0.00 0.00 0.00 0.00 1997-05-06 00:00:00 NULL 81 2009-01-01
+23 Many Many NULL 1994-07-24 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-07-25 00:00:00 NULL 26 2009-01-01
+23 Many Many NULL 1994-10-13 0.0 0.0 0.00 NULL 0.00 0.00 0.00 0.00 1994-10-24 00:00:00 NULL 79 2009-12-31
+24 Many Many NULL 1996-02-26 31762.584 31762.584 0.00 0.00 0.00 0.00 0.00 0.00 1996-03-18 00:00:00 NULL 75 2009-01-01
+24 Many Many NULL 1996-04-04 20542.032 20542.032 0.04 NULL 0.04 0.00 0.04 0.00 1996-04-01 00:00:00 NULL 18 2009-12-31
+25 Many Many NULL 1995-12-06 27263.995 27263.995 NULL NULL 0.00 0.00 0.00 0.00 1995-12-21 00:00:00 NULL -4 2009-01-01
+25 Many Many NULL 1998-04-15 43064.1575 43064.1575 0.07 NULL 0.07 0.00 0.07 0.00 1998-04-11 00:00:00 NULL -11 2009-01-01
+26 Many Many NULL 1996-11-09 39912.433600000004 39912.433600000004 0.04 NULL 0.04 0.00 0.04 0.00 1996-11-20 00:00:00 NULL 31 2009-01-01
+26 Many Many NULL 1995-04-25 0.0 0.0 0.03 NULL 0.03 0.00 0.03 0.00 1995-05-13 00:00:00 NULL 18 2009-01-01
+26 Many Many NULL 1993-11-03 0.0 0.0 0.02 0.02 0.02 0.02 0.02 0.02 1993-11-04 00:00:00 NULL -44 2009-01-01
+26 Many Many NULL 1994-10-21 0.0 0.0 0.08 NULL 0.08 0.00 0.08 0.00 1994-10-19 00:00:00 NULL 24 2009-01-01
+27 Many Many NULL 1998-06-29 45590.2425 45590.2425 NULL NULL 0.00 0.00 0.00 0.00 1998-06-29 00:00:00 NULL 4 2009-01-01
+27 Many Many NULL 1994-01-26 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-01-23 00:00:00 NULL 62 2009-01-01
+28 Many Many NULL 1993-12-19 0.0 0.0 0.00 0.00 0.00 0.00 0.00 0.00 1994-01-01 00:00:00 NULL -9 2009-01-01
+28 Many Many NULL 1996-04-26 26349.6324 26349.6324 0.06 NULL 0.06 0.00 0.06 0.00 1996-05-16 00:00:00 NULL 47 2009-01-01
+28 Many Many NULL 1994-12-29 0.0 0.0 0.07 NULL 0.07 0.00 0.07 0.00 1995-01-16 00:00:00 NULL 83 2009-01-01
+28 Many Many NULL 1995-10-28 44866.219999999994 44866.219999999994 0.08 0.08 0.08 0.08 0.08 0.08 1995-10-26 00:00:00 NULL 60 2009-01-01
+28 Many Many NULL 1996-02-06 45975.3616 45975.3616 0.02 NULL 0.02 0.00 0.02 0.00 1996-02-28 00:00:00 NULL 66 2009-01-01
+28 Many Many NULL 1996-03-26 30855.6612 30855.6612 0.04 NULL 0.04 0.00 0.04 0.00 1996-04-20 00:00:00 NULL 12 2009-12-31
+29 Many Many NULL 1997-01-30 39341.806 39341.806 NULL NULL 0.00 0.00 0.00 0.00 1997-01-27 00:00:00 NULL 0 2009-01-01
+30 Many Many NULL 1998-08-16 44561.46 44561.46 0.06 NULL 0.06 0.00 0.06 0.00 1998-08-14 00:00:00 NULL 34 2009-12-31
+30 Many Many NULL 1996-01-15 29770.173 29770.173 NULL NULL 0.00 0.00 0.00 0.00 1996-01-18 00:00:00 NULL 35 2009-12-31
+30 Many Many NULL 1994-06-08 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-06-22 00:00:00 NULL 24 2009-01-01
+31 Many Many NULL 1993-11-03 0.0 0.0 0.04 NULL 0.04 0.00 0.04 0.00 1993-11-08 00:00:00 NULL -41 2009-01-01
+31 Many Many NULL 1994-02-24 0.0 0.0 0.08 0.08 0.08 0.08 0.08 0.08 1994-02-20 00:00:00 NULL -19 2009-01-01
+32 Many Many NULL 1995-08-19 63313.3312 63313.3312 0.00 NULL 0.00 0.00 0.00 0.00 1995-08-27 00:00:00 NULL -41 2009-01-01
+32 Many Many NULL 1993-12-14 0.0 0.0 0.05 NULL 0.05 0.00 0.05 0.00 1993-12-28 00:00:00 NULL -7 2009-12-31
+32 Many Many NULL 1996-10-07 44955.15839999999 44955.15839999999 0.05 NULL 0.05 0.00 0.05 0.00 1996-10-14 00:00:00 NULL -66 2009-12-31
+32 Many Many NULL 1994-08-29 0.0 0.0 0.06 NULL 0.06 0.00 0.06 0.00 1994-08-31 00:00:00 NULL 14 2009-01-01
+32 Many Many NULL 1996-02-04 46146.7488 46146.7488 NULL NULL 0.00 0.00 0.00 0.00 1996-02-03 00:00:00 NULL -4 2009-01-01
+33 Many Many NULL 1998-04-17 54174.12 54174.12 0.01 NULL 0.01 0.00 0.01 0.00 1998-04-15 00:00:00 NULL 26 2009-01-01
+34 Many Many NULL 1996-01-27 63982.002400000005 63982.002400000005 NULL NULL 0.00 0.00 0.00 0.00 1996-01-27 00:00:00 NULL 21 2009-01-01
+34 Many Many NULL 1995-11-13 60586.5448 60586.5448 0.06 NULL 0.06 0.00 0.06 0.00 1995-11-26 00:00:00 NULL -50 2009-01-01
+34 Many Many NULL 1998-03-10 56487.763199999994 56487.763199999994 NULL NULL 0.00 0.00 0.00 0.00 1998-03-30 00:00:00 NULL -23 2009-01-01
+35 Many Many NULL 1996-01-21 40475.225 40475.225 0.03 0.03 0.03 0.03 0.03 0.03 1996-01-22 00:00:00 NULL -32 2009-01-01
+36 Many Many NULL 1996-04-17 41844.6756 41844.6756 0.06 0.06 0.06 0.06 0.06 0.06 1996-04-20 00:00:00 NULL 52 2009-01-01
+37 Many Many NULL 1993-04-23 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1993-04-14 00:00:00 NULL 15 2009-12-31
+37 Many Many NULL 1992-05-02 0.0 0.0 0.03 0.03 0.03 0.03 0.03 0.03 1992-05-02 00:00:00 NULL -13 2009-01-01
+37 Many Many NULL 1994-02-18 0.0 0.0 0.04 NULL 0.04 0.00 0.04 0.00 1994-02-21 00:00:00 NULL -23 2009-01-01
+38 Many Many NULL 1997-02-02 44694.46 44694.46 0.05 0.05 0.05 0.05 0.05 0.05 1997-02-02 00:00:00 NULL 19 2009-01-01
+38 Many Many NULL 1996-02-16 68028.3144 68028.3144 NULL NULL 0.00 0.00 0.00 0.00 1996-02-18 00:00:00 NULL -6 2009-01-01
+39 Many Many NULL 1992-07-07 0.0 0.0 0.02 0.02 0.02 0.02 0.02 0.02 1992-07-28 00:00:00 NULL -21 2009-01-01
+39 Many Many NULL 1998-02-03 45146.01 45146.01 NULL NULL 0.00 0.00 0.00 0.00 1998-02-18 00:00:00 NULL -48 2009-01-01
+40 Many Many NULL 1992-07-26 0.0 0.0 0.03 NULL 0.03 0.00 0.03 0.00 1992-08-15 00:00:00 NULL 14 2009-01-01
+40 Many Many NULL 1996-12-13 51224.736 51224.736 0.05 NULL 0.05 0.00 0.05 0.00 1997-01-01 00:00:00 NULL 71 2009-01-01
+41 Many Many NULL 1994-02-26 0.0 0.0 0.07 NULL 0.07 0.00 0.07 0.00 1994-03-18 00:00:00 NULL 17 2009-01-01
+41 Many Many NULL 1998-07-04 47989.6144 47989.6144 0.08 NULL 0.08 0.00 0.08 0.00 1998-07-06 00:00:00 NULL 9 2009-01-01
+41 Many Many NULL 1993-11-14 0.0 0.0 0.00 0.00 0.00 0.00 0.00 0.00 1993-11-11 00:00:00 NULL -74 2009-01-01
+42 Many Many NULL 1994-08-05 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-08-28 00:00:00 NULL 33 2009-12-31
+42 Many Many NULL 1996-02-13 68289.9672 68289.9672 0.00 NULL 0.00 0.00 0.00 0.00 1996-02-23 00:00:00 NULL 33 2009-01-01
+43 Many Many NULL 1996-10-22 62727.3207 62727.3207 0.01 NULL 0.01 0.00 0.01 0.00 1996-10-26 00:00:00 NULL -19 2009-12-31
+43 Many Many NULL 1992-07-15 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1992-08-02 00:00:00 NULL 27 2009-01-01
+44 Many Many NULL 1997-03-23 60781.124800000005 60781.124800000005 NULL NULL 0.00 0.00 0.00 0.00 1997-04-13 00:00:00 NULL 74 2009-12-31
+44 Many Many NULL 1995-09-02 75106.658 75106.658 NULL NULL 0.00 0.00 0.00 0.00 1995-09-14 00:00:00 NULL 25 2009-01-01
+44 Many Many NULL 1996-10-04 80882.4192 80882.4192 0.02 NULL 0.02 0.00 0.02 0.00 1996-09-30 00:00:00 NULL -48 2009-01-01
+44 Many Many NULL 1996-11-19 48941.692800000004 48941.692800000004 0.06 NULL 0.06 0.00 0.06 0.00 1996-12-12 00:00:00 NULL -3 2009-01-01
+45 Many Many NULL 1994-02-07 0.0 0.0 0.00 NULL 0.00 0.00 0.00 0.00 1994-02-23 00:00:00 NULL 50 2009-01-01
+45 Many Many NULL 1998-03-05 61489.35 61489.35 NULL NULL 0.00 0.00 0.00 0.00 1998-03-24 00:00:00 NULL 4 2009-01-01
+46 Many Many NULL 1996-01-20 73475.892 73475.892 0.07 NULL 0.07 0.00 0.07 0.00 1996-02-03 00:00:00 NULL -53 2009-01-01
+46 Many Many NULL 1996-10-01 77781.4092 77781.4092 NULL NULL 0.00 0.00 0.00 0.00 1996-10-26 00:00:00 NULL -54 2009-01-01
+46 Many Many NULL 1998-08-18 84565.5168 84565.5168 0.05 NULL 0.05 0.00 0.05 0.00 1998-08-29 00:00:00 NULL 52 2009-01-01
+46 Many Many NULL 1998-07-01 56583.5144 56583.5144 0.05 NULL 0.05 0.00 0.05 0.00 1998-07-05 00:00:00 NULL 28 2009-01-01
+48 Many Many NULL 1994-08-22 0.0 0.0 0.07 NULL 0.07 0.00 0.07 0.00 1994-09-08 00:00:00 NULL 28 2009-01-01
+49 Many Many NULL 1993-11-14 0.0 0.0 0.00 0.00 0.00 0.00 0.00 0.00 1993-11-24 00:00:00 NULL -26 2009-12-31
+50 Many Many NULL 1994-08-13 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-08-26 00:00:00 NULL -48 2009-12-31
+PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
+SELECT
+ L_QUANTITY as Quantity,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN "Many"
+ ELSE "Huge number" END AS Quantity_Description,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN "Many"
+ ELSE NULL END AS Quantity_Description_2,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN NULL
+ ELSE NULL END AS Quantity_Description_3,
+ IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date,
+ CASE WHEN L_RETURNFLAG = "N"
+ THEN l_extendedprice * (1 - l_discount)
+ ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting.
+ CASE WHEN L_RETURNFLAG = "N"
+ THEN l_extendedprice * (1 - l_discount)
+ ELSE CAST(0 AS DOUBLE) END AS Field_2,
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4,
+ -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal.
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6,
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8,
+ IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9,
+ IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10,
+ IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11,
+ IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12
+FROM lineitem_test
+ORDER BY Quantity
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
+SELECT
+ L_QUANTITY as Quantity,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN "Many"
+ ELSE "Huge number" END AS Quantity_Description,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN "Many"
+ ELSE NULL END AS Quantity_Description_2,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN NULL
+ ELSE NULL END AS Quantity_Description_3,
+ IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date,
+ CASE WHEN L_RETURNFLAG = "N"
+ THEN l_extendedprice * (1 - l_discount)
+ ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting.
+ CASE WHEN L_RETURNFLAG = "N"
+ THEN l_extendedprice * (1 - l_discount)
+ ELSE CAST(0 AS DOUBLE) END AS Field_2,
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4,
+ -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal.
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6,
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8,
+ IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9,
+ IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10,
+ IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11,
+ IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12
+FROM lineitem_test
+ORDER BY Quantity
+POSTHOOK: type: QUERY
+Explain
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: lineitem_test
+ Statistics: Num rows: 101 Data size: 57327 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:l_orderkey:int, 1:l_partkey:int, 2:l_suppkey:int, 3:l_linenumber:int, 4:l_quantity:int, 5:l_extendedprice:double, 6:l_discount:double, 7:l_tax:decimal(10,2), 8:l_returnflag:char(1), 9:l_linestatus:char(1), 10:l_shipdate:date, 11:l_commitdate:date, 12:l_receiptdate:date, 13:l_shipinstruct:varchar(20), 14:l_shipmode:char(10), 15:l_comment:string, 16:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>]
+ Select Operator
+ expressions: l_quantity (type: int), CASE WHEN ((l_quantity = 1)) THEN ('Single') WHEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') WHEN ((l_quantity < 100)) THEN ('Many') ELSE ('Huge number') END (type: string), CASE WHEN ((l_quantity = 1)) THEN ('Single') WHEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') WHEN ((l_quantity < 100)) THEN ('Many') ELSE (null) END (type: string), CASE WHEN ((l_quantity = 1)) THEN ('Single') WHEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') WHEN ((l_quantity < 100)) THEN (null) ELSE (null) END (type: string), if((l_shipmode = 'SHIP '), date_add(l_shipdate, 10), date_add(l_shipdate, 5)) (type: date), CASE WHEN ((l_returnflag = 'N')) THEN ((l_extendedprice * (1.0D - l_discount))) ELSE (0) END (type: double), CASE WHEN ((l_returnflag = 'N')) THEN ((l_extendedprice * (1.0D - l_discount))) ELSE (0.0D) END (type: double), if((UDFToString(l_shipinstruct) = 'D
ELIVER IN PERSON'), null, l_tax) (type: decimal(10,2)), if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, null) (type: decimal(10,2)), if((UDFToString(l_shipinstruct) = 'DELIVER IN PERSON'), 0, l_tax) (type: decimal(12,2)), if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, 0) (type: decimal(12,2)), if((UDFToString(l_shipinstruct) = 'DELIVER IN PERSON'), 0, l_tax) (type: decimal(10,2)), if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, 0) (type: decimal(10,2)), if((l_partkey > 30), CAST( l_receiptdate AS TIMESTAMP), CAST( l_commitdate AS TIMESTAMP)) (type: timestamp), if((l_suppkey > 10000), datediff(l_receiptdate, l_commitdate), null) (type: int), if((l_suppkey > 10000), null, datediff(l_receiptdate, l_commitdate)) (type: int), if(((l_suppkey % 500) > 100), DATE'2009-01-01', DATE'2009-12-31') (type: date)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [4, 27, 38, 48, 52, 54, 60, 63, 65, 67, 68, 69, 70, 73, 76, 79, 80]
+ selectExpressions: IfExprColumnCondExpr(col 17:boolean, col 18:stringcol 26:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 17:boolean, ConstantVectorExpression(val Single) -> 18:string, IfExprColumnCondExpr(col 19:boolean, col 20:stringcol 25:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 19:boolean, ConstantVectorExpression(val Two) -> 20:string, IfExprColumnCondExpr(col 21:boolean, col 22:stringcol 24:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 21:boolean, ConstantVectorExpression(val Some) -> 22:string, IfExprStringScalarStringScalar(col 23:boolean, val Many, val Huge number)(children: LongColLessLongScalar(col 4:int, val 100) -> 23:boolean) -> 24:string) -> 25:string) -> 26:string) -> 27:string, IfExprColumnCondExpr(col 23:boolean, col 28:stringcol 37:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 23:boolean, ConstantVectorExpression(val Single) -> 28:string, IfExprColumnCondExpr(col 29:boolea
n, col 30:stringcol 36:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 29:boolean, ConstantVectorExpression(val Two) -> 30:string, IfExprColumnCondExpr(col 31:boolean, col 32:stringcol 35:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 31:boolean, ConstantVectorExpression(val Some) -> 32:string, IfExprColumnNull(col 33:boolean, col 34:string, null)(children: LongColLessLongScalar(col 4:int, val 100) -> 33:boolean, ConstantVectorExpression(val Many) -> 34:string) -> 35:string) -> 36:string) -> 37:string) -> 38:string, IfExprColumnCondExpr(col 39:boolean, col 40:stringcol 47:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 39:boolean, ConstantVectorExpression(val Single) -> 40:string, IfExprColumnCondExpr(col 41:boolean, col 42:stringcol 46:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 41:boolean, ConstantVectorExpression(val Two) -> 42:string, IfExprColumnCondExpr(col 43:boolean, col 44:stringcol 45:string)(children: LongColLes
sLongScalar(col 4:int, val 10) -> 43:boolean, ConstantVectorExpression(val Some) -> 44:string, IfExprNullNull(null, null) -> 45:string) -> 46:string) -> 47:string) -> 48:string, IfExprCondExprCondExpr(col 49:boolean, col 50:datecol 51:date)(children: StringGroupColEqualCharScalar(col 14:char(10), val SHIP) -> 49:boolean, VectorUDFDateAddColScalar(col 10:date, val 10) -> 50:date, VectorUDFDateAddColScalar(col 10:date, val 5) -> 51:date) -> 52:date, IfExprDoubleColumnLongScalar(col 57:boolean, col 58:double, val 0)(children: StringGroupColEqualCharScalar(col 8:char(1), val N) -> 57:boolean, DoubleColMultiplyDoubleColumn(col 5:double, col 54:double)(children: DoubleScalarSubtractDoubleColumn(val 1.0, col 6:double) -> 54:double) -> 58:double) -> 54:double, IfExprCondExprColumn(col 57:boolean, col 59:double, col 58:double)(children: StringGroupColEqualCharScalar(col 8:char(1), val N) -> 57:boolean, DoubleColMultiplyDoubleColumn(col 5:double, col 58:double)(children: DoubleScalarSubtractD
oubleColumn(val 1.0, col 6:double) -> 58:double) -> 59:double, ConstantVectorExpression(val 0.0) -> 58:double) -> 60:double, IfExprNullColumn(col 62:boolean, null, col 7)(children: StringGroupColEqualStringScalar(col 61:string, val DELIVER IN PERSON)(children: CastStringGroupToString(col 13:varchar(20)) -> 61:string) -> 62:boolean, col 7:decimal(10,2)) -> 63:decimal(10,2), IfExprColumnNull(col 64:boolean, col 7:decimal(10,2), null)(children: StringGroupColEqualStringScalar(col 61:string, val TAKE BACK RETURN)(children: CastStringGroupToString(col 13:varchar(20)) -> 61:string) -> 64:boolean, col 7:decimal(10,2)) -> 65:decimal(10,2), VectorUDFAdaptor(if((UDFToString(l_shipinstruct) = 'DELIVER IN PERSON'), 0, l_tax))(children: StringGroupColEqualStringScalar(col 61:string, val DELIVER IN PERSON)(children: CastStringGroupToString(col 13:varchar(20)) -> 61:string) -> 66:boolean) -> 67:decimal(12,2), VectorUDFAdaptor(if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, 0))(childr
en: StringGroupColEqualStringScalar(col 61:string, val TAKE BACK RETURN)(children: CastStringGroupToString(col 13:varchar(20)) -> 61:string) -> 66:boolean) -> 68:decimal(12,2), VectorUDFAdaptor(if((UDFToString(l_shipinstruct) = 'DELIVER IN PERSON'), 0, l_tax))(children: StringGroupColEqualStringScalar(col 61:string, val DELIVER IN PERSON)(children: CastStringGroupToString(col 13:varchar(20)) -> 61:string) -> 66:boolean) -> 69:decimal(10,2), VectorUDFAdaptor(if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, 0))(children: StringGroupColEqualStringScalar(col 61:string, val TAKE BACK RETURN)(children: CastStringGroupToString(col 13:varchar(20)) -> 61:string) -> 66:boolean) -> 70:decimal(10,2), IfExprCondExprCondExpr(col 66:boolean, col 71:timestampcol 72:timestamp)(children: LongColGreaterLongScalar(col 1:int, val 30) -> 66:boolean, CastDateToTimestamp(col 12:date) -> 71:timestamp, CastDateToTimestamp(col 11:date) -> 72:timestamp) -> 73:timestamp, IfExprCondExprNull(col 74:b
oolean, col 75:int, null)(children: LongColGreaterLongScalar(col 2:int, val 10000) -> 74:boolean, VectorUDFDateDiffColCol(col 12:date, col 11:date) -> 75:int) -> 76:int, IfExprNullCondExpr(col 77:boolean, null, col 78:int)(children: LongColGreaterLongScalar(col 2:int, val 10000) -> 77:boolean, VectorUDFDateDiffColCol(col 12:date, col 11:date) -> 78:int) -> 79:int, IfExprLongScalarLongScalar(col 81:boolean, val 14245, val 14609)(children: LongColGreaterLongScalar(col 80:int, val 100)(children: LongColModuloLongScalar(col 2:int, val 500) -> 80:int) -> 81:boolean) -> 80:date
+ Statistics: Num rows: 101 Data size: 57327 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ keyColumnNums: [4]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ valueColumnNums: [27, 38, 48, 52, 54, 60, 63, 65, 67, 68, 69, 70, 73, 76, 79, 80]
+ Statistics: Num rows: 101 Data size: 57327 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: date), _col5 (type: double), _col6 (type: double), _col7 (type: decimal(10,2)), _col8 (type: decimal(10,2)), _col9 (type: decimal(12,2)), _col10 (type: decimal(12,2)), _col11 (type: decimal(10,2)), _col12 (type: decimal(10,2)), _col13 (type: timestamp), _col14 (type: int), _col15 (type: int), _col16 (type: date)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: []
+ featureSupportInUse: []
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: true
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 16
+ includeColumns: [1, 2, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14]
+ dataColumns: l_orderkey:int, l_partkey:int, l_suppkey:int, l_linenumber:int, l_quantity:int, l_extendedprice:double, l_discount:double, l_tax:decimal(10,2), l_returnflag:char(1), l_linestatus:char(1), l_shipdate:date, l_commitdate:date, l_receiptdate:date, l_shipinstruct:varchar(20), l_shipmode:char(10), l_comment:string
+ partitionColumnCount: 0
+ scratchColumnTypeNames: [bigint, string, bigint, string, bigint, string, bigint, string, string, string, string, string, bigint, string, bigint, string, bigint, string, string, string, string, string, bigint, string, bigint, string, bigint, string, string, string, string, string, bigint, bigint, bigint, bigint, bigint, double, double, bigint, bigint, double, double, double, string, bigint, decimal(10,2), bigint, decimal(10,2), bigint, decimal(12,2), decimal(12,2), decimal(10,2), decimal(10,2), timestamp, timestamp, timestamp, bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint]
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ reduceColumnNullOrder: a
+ reduceColumnSortOrder: +
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 17
+ dataColumns: KEY.reducesinkkey0:int, VALUE._col0:string, VALUE._col1:string, VALUE._col2:string, VALUE._col3:date, VALUE._col4:double, VALUE._col5:double, VALUE._col6:decimal(10,2), VALUE._col7:decimal(10,2), VALUE._col8:decimal(12,2), VALUE._col9:decimal(12,2), VALUE._col10:decimal(10,2), VALUE._col11:decimal(10,2), VALUE._col12:timestamp, VALUE._col13:int, VALUE._col14:int, VALUE._col15:date
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: date), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: decimal(10,2)), VALUE._col7 (type: decimal(10,2)), VALUE._col8 (type: decimal(12,2)), VALUE._col9 (type: decimal(12,2)), VALUE._col10 (type: decimal(10,2)), VALUE._col11 (type: decimal(10,2)), VALUE._col12 (type: timestamp), VALUE._col13 (type: int), VALUE._col14 (type: int), VALUE._col15 (type: date)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
+ Statistics: Num rows: 101 Data size: 57327 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 101 Data size: 57327 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT
+ L_QUANTITY as Quantity,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN "Many"
+ ELSE "Huge number" END AS Quantity_Description,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN "Many"
+ ELSE NULL END AS Quantity_Description_2,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN NULL
+ ELSE NULL END AS Quantity_Descri
<TRUNCATED>
[7/7] hive git commit: HIVE-18819: Vectorization: Optimize IF
statement expression evaluation of THEN/ELSE (Matt McCline,
reviewed by Teddy Choi)
Posted by mm...@apache.org.
HIVE-18819: Vectorization: Optimize IF statement expression evaluation of THEN/ELSE (Matt McCline, reviewed by Teddy Choi)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/53980ba6
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/53980ba6
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/53980ba6
Branch: refs/heads/master
Commit: 53980ba66df815c64c8d644b36cb37b314ac2d7f
Parents: 1a3090f
Author: Matt McCline <mm...@hortonworks.com>
Authored: Sat Mar 3 05:50:40 2018 -0600
Committer: Matt McCline <mm...@hortonworks.com>
Committed: Sat Mar 3 05:50:40 2018 -0600
----------------------------------------------------------------------
.../org/apache/hadoop/hive/conf/HiveConf.java | 7 +-
data/files/student_10_lines | 10 +
data/files/student_2_lines | 2 -
.../test/resources/testconfiguration.properties | 2 +
.../ql/exec/vector/VectorizationContext.java | 210 ++-
.../expressions/IfExprColumnCondExpr.java | 125 ++
.../vector/expressions/IfExprCondExprBase.java | 171 +++
.../expressions/IfExprCondExprColumn.java | 125 ++
.../expressions/IfExprCondExprCondExpr.java | 122 ++
.../vector/expressions/IfExprCondExprNull.java | 115 ++
.../vector/expressions/IfExprNullCondExpr.java | 117 ++
.../queries/clientpositive/vector_case_when_1.q | 281 ++++
.../queries/clientpositive/vector_case_when_2.q | 208 +++
.../clientpositive/vector_udf_adaptor_1.q | 210 ++-
.../llap/vector_case_when_1.q.out | 1204 ++++++++++++++++++
.../llap/vector_case_when_2.q.out | 884 +++++++++++++
.../llap/vector_groupby_grouping_id1.q.out | 8 +-
.../llap/vector_udf_adaptor_1.q.out | 1062 ++++++++++++++-
.../llap/vector_when_case_null.q.out | 6 +-
.../clientpositive/llap/vectorized_case.q.out | 32 +-
.../clientpositive/spark/vectorized_case.q.out | 32 +-
.../clientpositive/vector_case_when_1.q.out | 1126 ++++++++++++++++
.../clientpositive/vector_case_when_2.q.out | 806 ++++++++++++
.../clientpositive/vector_when_case_null.q.out | 6 +-
.../clientpositive/vectorized_case.q.out | 32 +-
25 files changed, 6730 insertions(+), 173 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/53980ba6/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 1e322b8..edea129 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -2978,7 +2978,12 @@ public class HiveConf extends Configuration {
"Which vectorized input format support features are enabled for vectorization.\n" +
"That is, if a VectorizedInputFormat input format does support \"decimal_64\" for example\n" +
"this variable must enable that to be used in vectorization"),
-
+ HIVE_VECTORIZED_IF_EXPR_MODE("hive.vectorized.if.expr.mode", "better", new StringSet("adaptor", "good", "better"),
+ "Specifies the extent to which SQL IF statements will be vectorized.\n" +
+ "0. adaptor: only use the VectorUDFAdaptor to vectorize IF statements\n" +
+ "1. good : use regular vectorized IF expression classes that get good performance\n" +
+ "2. better : use vectorized IF expression classes that conditionally execute THEN/ELSE\n" +
+ " expressions for better performance.\n"),
HIVE_TEST_VECTORIZATION_ENABLED_OVERRIDE("hive.test.vectorized.execution.enabled.override",
"none", new StringSet("none", "enable", "disable"),
"internal use only, used to override the hive.vectorized.execution.enabled setting and\n" +
http://git-wip-us.apache.org/repos/asf/hive/blob/53980ba6/data/files/student_10_lines
----------------------------------------------------------------------
diff --git a/data/files/student_10_lines b/data/files/student_10_lines
new file mode 100644
index 0000000..2f1b331
--- /dev/null
+++ b/data/files/student_10_lines
@@ -0,0 +1,10 @@
+tom thompson420.53
+luke king280.47
+priscilla falkner551.16
+luke brown601.14
+ulysses garcia352.74
+calvin brown282.70
+oscar thompson352.98
+xavier garcia331.06
+nick johnson34
+quinn ovid19
http://git-wip-us.apache.org/repos/asf/hive/blob/53980ba6/data/files/student_2_lines
----------------------------------------------------------------------
diff --git a/data/files/student_2_lines b/data/files/student_2_lines
deleted file mode 100644
index 9e86836..0000000
--- a/data/files/student_2_lines
+++ /dev/null
@@ -1,2 +0,0 @@
-tom thompson420.53
-luke king280.47
http://git-wip-us.apache.org/repos/asf/hive/blob/53980ba6/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 544c836..9d80920 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -314,6 +314,8 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\
vector_binary_join_groupby.q,\
vector_bround.q,\
vector_bucket.q,\
+ vector_case_when_1.q,\
+ vector_case_when_2.q,\
vector_cast_constant.q,\
vector_char_2.q,\
vector_char_4.q,\
http://git-wip-us.apache.org/repos/asf/hive/blob/53980ba6/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
index 4df6e97..93212ce 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
@@ -133,6 +133,21 @@ public class VectorizationContext {
}
private HiveVectorAdaptorUsageMode hiveVectorAdaptorUsageMode;
+
+ public enum HiveVectorIfStmtMode {
+ ADAPTOR,
+ GOOD,
+ BETTER;
+
+ public static HiveVectorIfStmtMode getHiveConfValue(HiveConf hiveConf) {
+ String string = HiveConf.getVar(hiveConf,
+ HiveConf.ConfVars.HIVE_VECTORIZED_IF_EXPR_MODE);
+ return valueOf(string.toUpperCase());
+ }
+ }
+
+ private HiveVectorIfStmtMode hiveVectorIfStmtMode;
+
//when set to true use the overflow checked vector expressions
private boolean useCheckedVectorExpressions;
@@ -141,6 +156,7 @@ public class VectorizationContext {
private void setHiveConfVars(HiveConf hiveConf) {
hiveVectorAdaptorUsageMode = HiveVectorAdaptorUsageMode.getHiveConfValue(hiveConf);
+ hiveVectorIfStmtMode = HiveVectorIfStmtMode.getHiveConfValue(hiveConf);
this.reuseScratchColumns =
HiveConf.getBoolVar(hiveConf, ConfVars.HIVE_VECTORIZATION_TESTING_REUSE_SCRATCH_COLUMNS);
this.ocm.setReuseColumns(reuseScratchColumns);
@@ -150,6 +166,7 @@ public class VectorizationContext {
private void copyHiveConfVars(VectorizationContext vContextEnvironment) {
hiveVectorAdaptorUsageMode = vContextEnvironment.hiveVectorAdaptorUsageMode;
+ hiveVectorIfStmtMode = vContextEnvironment.hiveVectorIfStmtMode;
this.reuseScratchColumns = vContextEnvironment.reuseScratchColumns;
this.ocm.setReuseColumns(reuseScratchColumns);
}
@@ -765,7 +782,7 @@ public class VectorizationContext {
// and that would require converting their data types to evaluate the udf.
// For example decimal column added to an integer column would require integer column to be
// cast to decimal.
- // Note: this is a no-op for custom UDFs
+ // Note: this is a no-op for custom UDFs
List<ExprNodeDesc> childExpressions = getChildExpressionsWithImplicitCast(expr.getGenericUDF(),
exprDesc.getChildren(), exprDesc.getTypeInfo());
ve = getGenericUdfVectorExpression(expr.getGenericUDF(),
@@ -1978,6 +1995,8 @@ public class VectorizationContext {
ve = getBetweenFilterExpression(childExpr, mode, returnType);
} else if (udf instanceof GenericUDFIn) {
ve = getInExpression(childExpr, mode, returnType);
+ } else if (udf instanceof GenericUDFIf) {
+ ve = getIfExpression((GenericUDFIf) udf, childExpr, mode, returnType);
} else if (udf instanceof GenericUDFWhen) {
ve = getWhenExpression(childExpr, mode, returnType);
} else if (udf instanceof GenericUDFOPPositive) {
@@ -2911,38 +2930,56 @@ public class VectorizationContext {
return createVectorExpression(cl, childrenAfterNot, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
}
+ private boolean isCondExpr(ExprNodeDesc exprNodeDesc) {
+ if (exprNodeDesc instanceof ExprNodeConstantDesc ||
+ exprNodeDesc instanceof ExprNodeColumnDesc) {
+ return false;
+ }
+ return true; // Requires conditional evaluation for good performance.
+ }
+
private boolean isNullConst(ExprNodeDesc exprNodeDesc) {
//null constant could be typed so we need to check the value
if (exprNodeDesc instanceof ExprNodeConstantDesc &&
((ExprNodeConstantDesc) exprNodeDesc).getValue() == null) {
- return true;
+ return true;
}
return false;
}
- private VectorExpression getWhenExpression(List<ExprNodeDesc> childExpr,
+ private VectorExpression getIfExpression(GenericUDFIf genericUDFIf, List<ExprNodeDesc> childExpr,
VectorExpressionDescriptor.Mode mode, TypeInfo returnType) throws HiveException {
if (mode != VectorExpressionDescriptor.Mode.PROJECTION) {
return null;
}
- final int size = childExpr.size();
- final ExprNodeDesc whenDesc = childExpr.get(0);
- final ExprNodeDesc thenDesc = childExpr.get(1);
- final ExprNodeDesc elseDesc;
+ // Add HiveConf variable with 3 modes:
+ // 1) adaptor: Always use VectorUDFAdaptor for IF statements.
+ //
+ // 2) good: Vectorize but don't optimize conditional expressions
+ //
+ // 3) better: Vectorize and Optimize conditional expressions.
+ //
- if (size == 2) {
- elseDesc = new ExprNodeConstantDesc(returnType, null);
- } else if (size == 3) {
- elseDesc = childExpr.get(2);
- } else {
- final GenericUDFWhen udfWhen = new GenericUDFWhen();
- elseDesc = new ExprNodeGenericFuncDesc(returnType, udfWhen, udfWhen.getUdfName(),
- childExpr.subList(2, childExpr.size()));
+ if (hiveVectorIfStmtMode == HiveVectorIfStmtMode.ADAPTOR) {
+ return null;
}
- if (isNullConst(thenDesc) && isNullConst(elseDesc)) {
+ // Align the THEN/ELSE types.
+ childExpr =
+ getChildExpressionsWithImplicitCast(
+ genericUDFIf,
+ childExpr,
+ returnType);
+
+ final ExprNodeDesc ifDesc = childExpr.get(0);
+ final ExprNodeDesc thenDesc = childExpr.get(1);
+ final ExprNodeDesc elseDesc = childExpr.get(2);
+
+ final boolean isThenNullConst = isNullConst(thenDesc);
+ final boolean isElseNullConst = isNullConst(elseDesc);
+ if (isThenNullConst && isElseNullConst) {
// THEN NULL ELSE NULL: An unusual "case", but possible.
final int outputColumnNum = ocm.allocateOutputColumn(returnType);
@@ -2956,17 +2993,32 @@ public class VectorizationContext {
return resultExpr;
}
- if (isNullConst(thenDesc)) {
- final VectorExpression whenExpr = getVectorExpression(whenDesc, mode);
+
+ final boolean isThenCondExpr = isCondExpr(thenDesc);
+ final boolean isElseCondExpr = isCondExpr(elseDesc);
+
+ final boolean isOnlyGood = (hiveVectorIfStmtMode == HiveVectorIfStmtMode.GOOD);
+
+ if (isThenNullConst) {
+ final VectorExpression whenExpr = getVectorExpression(ifDesc, mode);
final VectorExpression elseExpr = getVectorExpression(elseDesc, mode);
final int outputColumnNum = ocm.allocateOutputColumn(returnType);
- final VectorExpression resultExpr =
- new IfExprNullColumn(
- whenExpr.getOutputColumnNum(),
- elseExpr.getOutputColumnNum(),
- outputColumnNum);
+ final VectorExpression resultExpr;
+ if (!isElseCondExpr || isOnlyGood) {
+ resultExpr =
+ new IfExprNullColumn(
+ whenExpr.getOutputColumnNum(),
+ elseExpr.getOutputColumnNum(),
+ outputColumnNum);
+ } else {
+ resultExpr =
+ new IfExprNullCondExpr(
+ whenExpr.getOutputColumnNum(),
+ elseExpr.getOutputColumnNum(),
+ outputColumnNum);
+ }
resultExpr.setChildExpressions(new VectorExpression[] {whenExpr, elseExpr});
@@ -2984,17 +3036,27 @@ public class VectorizationContext {
return resultExpr;
}
- if (isNullConst(elseDesc)) {
- final VectorExpression whenExpr = getVectorExpression(whenDesc, mode);
+
+ if (isElseNullConst) {
+ final VectorExpression whenExpr = getVectorExpression(ifDesc, mode);
final VectorExpression thenExpr = getVectorExpression(thenDesc, mode);
final int outputColumnNum = ocm.allocateOutputColumn(returnType);
- final VectorExpression resultExpr =
- new IfExprColumnNull(
- whenExpr.getOutputColumnNum(),
- thenExpr.getOutputColumnNum(),
- outputColumnNum);
+ final VectorExpression resultExpr;
+ if (!isThenCondExpr || isOnlyGood) {
+ resultExpr =
+ new IfExprColumnNull(
+ whenExpr.getOutputColumnNum(),
+ thenExpr.getOutputColumnNum(),
+ outputColumnNum);
+ } else {
+ resultExpr =
+ new IfExprCondExprNull(
+ whenExpr.getOutputColumnNum(),
+ thenExpr.getOutputColumnNum(),
+ outputColumnNum);
+ }
resultExpr.setChildExpressions(new VectorExpression[] {whenExpr, thenExpr});
@@ -3012,11 +3074,91 @@ public class VectorizationContext {
return resultExpr;
}
+
+ if ((isThenCondExpr || isElseCondExpr) && !isOnlyGood) {
+ final VectorExpression whenExpr = getVectorExpression(ifDesc, mode);
+ final VectorExpression thenExpr = getVectorExpression(thenDesc, mode);
+ final VectorExpression elseExpr = getVectorExpression(elseDesc, mode);
+
+ // Only proceed if the THEN/ELSE types were aligned.
+ if (thenExpr.getOutputColumnVectorType() == elseExpr.getOutputColumnVectorType()) {
+
+ final int outputColumnNum = ocm.allocateOutputColumn(returnType);
+
+ final VectorExpression resultExpr;
+ if (isThenCondExpr && isElseCondExpr) {
+ resultExpr =
+ new IfExprCondExprCondExpr(
+ whenExpr.getOutputColumnNum(),
+ thenExpr.getOutputColumnNum(),
+ elseExpr.getOutputColumnNum(),
+ outputColumnNum);
+ } else if (isThenCondExpr) {
+ resultExpr =
+ new IfExprCondExprColumn(
+ whenExpr.getOutputColumnNum(),
+ thenExpr.getOutputColumnNum(),
+ elseExpr.getOutputColumnNum(),
+ outputColumnNum);
+ } else {
+ resultExpr =
+ new IfExprColumnCondExpr(
+ whenExpr.getOutputColumnNum(),
+ thenExpr.getOutputColumnNum(),
+ elseExpr.getOutputColumnNum(),
+ outputColumnNum);
+ }
+
+ resultExpr.setChildExpressions(new VectorExpression[] {whenExpr, thenExpr, elseExpr});
+
+ resultExpr.setInputTypeInfos(
+ whenExpr.getOutputTypeInfo(),
+ thenExpr.getOutputTypeInfo(),
+ elseExpr.getOutputTypeInfo());
+ resultExpr.setInputDataTypePhysicalVariations(
+ whenExpr.getOutputDataTypePhysicalVariation(),
+ thenExpr.getOutputDataTypePhysicalVariation(),
+ elseExpr.getOutputDataTypePhysicalVariation());
+
+ resultExpr.setOutputTypeInfo(returnType);
+ resultExpr.setOutputDataTypePhysicalVariation(DataTypePhysicalVariation.NONE);
+
+ return resultExpr;
+ }
+ }
+
+ Class<?> udfClass = genericUDFIf.getClass();
+ return getVectorExpressionForUdf(
+ genericUDFIf, udfClass, childExpr, mode, returnType);
+ }
+
+ private VectorExpression getWhenExpression(List<ExprNodeDesc> childExpr,
+ VectorExpressionDescriptor.Mode mode, TypeInfo returnType) throws HiveException {
+
+ if (mode != VectorExpressionDescriptor.Mode.PROJECTION) {
+ return null;
+ }
+ final int size = childExpr.size();
+
+ final ExprNodeDesc whenDesc = childExpr.get(0);
+ final ExprNodeDesc thenDesc = childExpr.get(1);
+ final ExprNodeDesc elseDesc;
+
+ if (size == 2) {
+ elseDesc = new ExprNodeConstantDesc(returnType, null);
+ } else if (size == 3) {
+ elseDesc = childExpr.get(2);
+ } else {
+ final GenericUDFWhen udfWhen = new GenericUDFWhen();
+ elseDesc = new ExprNodeGenericFuncDesc(returnType, udfWhen, udfWhen.getUdfName(),
+ childExpr.subList(2, childExpr.size()));
+ }
+
+ // Transform CASE WHEN with just a THEN/ELSE into an IF statement.
final GenericUDFIf genericUDFIf = new GenericUDFIf();
- final List<ExprNodeDesc> ifChildExpr = Arrays.<ExprNodeDesc>asList(whenDesc, thenDesc, elseDesc);
- final ExprNodeGenericFuncDesc exprNodeDesc =
- new ExprNodeGenericFuncDesc(returnType, genericUDFIf, "if", ifChildExpr);
- return getVectorExpression(exprNodeDesc, mode);
+ final List<ExprNodeDesc> ifChildExpr =
+ Arrays.<ExprNodeDesc>asList(whenDesc, thenDesc, elseDesc);
+ return getIfExpression(genericUDFIf, ifChildExpr, mode, returnType);
}
/*
http://git-wip-us.apache.org/repos/asf/hive/blob/53980ba6/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprColumnCondExpr.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprColumnCondExpr.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprColumnCondExpr.java
new file mode 100644
index 0000000..94e5190
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprColumnCondExpr.java
@@ -0,0 +1,125 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+
+/**
+ * Do regular execution of the THEN vector expression (a column or scalar) and conditional execution
+ * of the ELSE vector expression of a SQL IF statement.
+ */
+public class IfExprColumnCondExpr extends IfExprCondExprBase {
+ private static final long serialVersionUID = 1L;
+
+ protected final int arg2Column;
+ protected final int arg3Column;
+
+ public IfExprColumnCondExpr(int arg1Column, int arg2Column, int arg3Column,
+ int outputColumnNum) {
+ super(arg1Column, outputColumnNum);
+ this.arg2Column = arg2Column;
+ this.arg3Column = arg3Column;
+ }
+
+ public IfExprColumnCondExpr() {
+ super();
+
+ // Dummy final assignments.
+ arg2Column = -1;
+ arg3Column = -1;
+ }
+
+ @Override
+ public void evaluate(VectorizedRowBatch batch) {
+
+ int n = batch.size;
+ if (n <= 0) {
+ // Nothing to do
+ return;
+ }
+
+ /*
+ * Do common analysis of the IF statement boolean expression.
+ *
+ * The following protected members can be examined afterwards:
+ *
+ * boolean isIfStatementResultRepeated
+ * boolean isIfStatementResultThen
+ *
+ * int thenSelectedCount
+ * int[] thenSelected
+ * int elseSelectedCount
+ * int[] elseSelected
+ */
+ super.evaluate(batch);
+
+ ColumnVector outputColVector = batch.cols[outputColumnNum];
+ boolean[] outputIsNull = outputColVector.isNull;
+
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
+
+ // CONSIDER: Should be do this for all vector expressions that can
+ // work on BytesColumnVector output columns???
+ outputColVector.init();
+
+ ColumnVector thenColVector = batch.cols[arg2Column];
+ ColumnVector elseColVector = batch.cols[arg3Column];
+
+ final int thenCount = thenSelectedCount;
+ final int elseCount = elseSelectedCount;
+
+ if (isIfStatementResultRepeated) {
+ if (isIfStatementResultThen) {
+ // Evaluate THEN expression (only) and copy all its results.
+ childExpressions[1].evaluate(batch);
+ thenColVector.copySelected(batch.selectedInUse, batch.selected, n, outputColVector);
+ } else {
+ // Evaluate ELSE expression (only) and copy all its results.
+ childExpressions[2].evaluate(batch);
+ elseColVector.copySelected(batch.selectedInUse, batch.selected, n, outputColVector);
+ }
+ return;
+ }
+
+ // NOTE: We cannot use copySelected below since it is a whole column operation.
+
+ // The THEN expression is either IdentityExpression (a column) or a ConstantVectorExpression
+ // (a scalar) and trivial to evaluate.
+ childExpressions[1].evaluate(batch);
+ for (int i = 0; i < thenCount; i++) {
+ final int batchIndex = thenSelected[i];
+ outputIsNull[batchIndex] = false;
+ outputColVector.setElement(batchIndex, batchIndex, thenColVector);
+ }
+
+ conditionalEvaluate(batch, childExpressions[2], elseSelected, elseCount);
+ for (int i = 0; i < elseCount; i++) {
+ final int batchIndex = elseSelected[i];
+ outputIsNull[batchIndex] = false;
+ outputColVector.setElement(batchIndex, batchIndex, elseColVector);
+ }
+ }
+
+ @Override
+ public String vectorExpressionParameters() {
+ return getColumnParamString(0, arg1Column) + ", " + getColumnParamString(1, arg2Column) +
+ getColumnParamString(2, arg3Column);
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/53980ba6/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprCondExprBase.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprCondExprBase.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprCondExprBase.java
new file mode 100644
index 0000000..abc1343
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprCondExprBase.java
@@ -0,0 +1,171 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+
+/**
+ * Base class that supports conditional execution of the THEN/ELSE vector expressions of
+ * a SQL IF statement.
+ */
+public abstract class IfExprCondExprBase extends VectorExpression {
+ private static final long serialVersionUID = 1L;
+
+ protected final int arg1Column;
+
+ // Whether the IF statement boolean expression was repeating.
+ protected transient boolean isIfStatementResultRepeated;
+ protected transient boolean isIfStatementResultThen;
+
+ // The batchIndex for the rows that are for the THEN/ELSE rows respectively.
+ // Temporary work arrays.
+ protected transient int thenSelectedCount;
+ protected transient int[] thenSelected;
+ protected transient int elseSelectedCount;
+ protected transient int[] elseSelected;
+
+ public IfExprCondExprBase(int arg1Column, int outputColumnNum) {
+ super(outputColumnNum);
+ this.arg1Column = arg1Column;
+ }
+
+ public IfExprCondExprBase() {
+ super();
+
+ // Dummy final assignments.
+ arg1Column = -1;
+ }
+
+ public void conditionalEvaluate(VectorizedRowBatch batch, VectorExpression condVecExpr,
+ int[] condSelected, int condSize) {
+
+ int saveSize = batch.size;
+ boolean saveSelectedInUse = batch.selectedInUse;
+ int[] saveSelected = batch.selected;
+
+ batch.size = condSize;
+ batch.selectedInUse = true;
+ batch.selected = condSelected;
+
+ condVecExpr.evaluate(batch);
+
+ batch.size = saveSize;
+ batch.selectedInUse = saveSelectedInUse;
+ batch.selected = saveSelected;
+ }
+
+ @Override
+ public void evaluate(VectorizedRowBatch batch) {
+
+ // NOTE: We do conditional vector expression so we do not call super.evaluateChildren(batch).
+
+ thenSelectedCount = 0;
+ elseSelectedCount = 0;
+ isIfStatementResultRepeated = false;
+ isIfStatementResultThen = false; // Give it a value.
+
+ int n = batch.size;
+ if (n <= 0) {
+ // Nothing to do
+ return;
+ }
+
+ // Child #1 is the IF boolean expression.
+ childExpressions[0].evaluate(batch);
+ LongColumnVector ifExprColVector = (LongColumnVector) batch.cols[arg1Column];
+ if (ifExprColVector.isRepeating) {
+ isIfStatementResultRepeated = true;
+ isIfStatementResultThen =
+ ((ifExprColVector.noNulls || !ifExprColVector.isNull[0]) &&
+ ifExprColVector.vector[0] == 1);
+ return;
+ }
+
+ if (thenSelected == null || n > thenSelected.length) {
+
+ // (Re)allocate larger to be a multiple of 1024 (DEFAULT_SIZE).
+ final int roundUpSize =
+ ((n + VectorizedRowBatch.DEFAULT_SIZE - 1) / VectorizedRowBatch.DEFAULT_SIZE)
+ * VectorizedRowBatch.DEFAULT_SIZE;
+ thenSelected = new int[roundUpSize];
+ elseSelected = new int[roundUpSize];
+ }
+
+ int[] sel = batch.selected;
+ long[] vector = ifExprColVector.vector;
+
+ if (ifExprColVector.noNulls) {
+ if (batch.selectedInUse) {
+ for (int j = 0; j < n; j++) {
+ final int i = sel[j];
+ if (vector[i] == 1) {
+ thenSelected[thenSelectedCount++] = i;
+ } else {
+ elseSelected[elseSelectedCount++] = i;
+ }
+ }
+ } else {
+ for (int i = 0; i < n; i++) {
+ if (vector[i] == 1) {
+ thenSelected[thenSelectedCount++] = i;
+ } else {
+ elseSelected[elseSelectedCount++] = i;
+ }
+ }
+ }
+ } else {
+ boolean[] isNull = ifExprColVector.isNull;
+ if (batch.selectedInUse) {
+ for (int j = 0; j < n; j++) {
+ final int i = sel[j];
+ if (!isNull[i] && vector[i] == 1) {
+ thenSelected[thenSelectedCount++] = i;
+ } else {
+ elseSelected[elseSelectedCount++] = i;
+ }
+ }
+ } else {
+ for (int i = 0; i < n; i++) {
+ if (!isNull[i] && vector[i] == 1) {
+ thenSelected[thenSelectedCount++] = i;
+ } else {
+ elseSelected[elseSelectedCount++] = i;
+ }
+ }
+ }
+ }
+
+ if (thenSelectedCount == 0) {
+ isIfStatementResultRepeated = true;
+ isIfStatementResultThen = false;
+ } else if (elseSelectedCount == 0) {
+ isIfStatementResultRepeated = true;
+ isIfStatementResultThen = true;
+ }
+ }
+
+ @Override
+ public VectorExpressionDescriptor.Descriptor getDescriptor() {
+
+ // Descriptor is not defined because it takes variable number of arguments with different
+ // data types.
+ throw new UnsupportedOperationException("Undefined descriptor");
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/53980ba6/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprCondExprColumn.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprCondExprColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprCondExprColumn.java
new file mode 100644
index 0000000..cc465c1
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprCondExprColumn.java
@@ -0,0 +1,125 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+
+/**
+ * Do conditional execution of the THEN/ vector expression and regular execution of the ELSE
+ * vector expression (a column or scalar) of a SQL IF statement.
+ */
+public class IfExprCondExprColumn extends IfExprCondExprBase {
+ private static final long serialVersionUID = 1L;
+
+ protected final int arg2Column;
+ protected final int arg3Column;
+
+ public IfExprCondExprColumn(int arg1Column, int arg2Column, int arg3Column,
+ int outputColumnNum) {
+ super(arg1Column, outputColumnNum);
+ this.arg2Column = arg2Column;
+ this.arg3Column = arg3Column;
+ }
+
+ public IfExprCondExprColumn() {
+ super();
+
+ // Dummy final assignments.
+ arg2Column = -1;
+ arg3Column = -1;
+ }
+
+ @Override
+ public void evaluate(VectorizedRowBatch batch) {
+
+ int n = batch.size;
+ if (n <= 0) {
+ // Nothing to do
+ return;
+ }
+
+ /*
+ * Do common analysis of the IF statement boolean expression.
+ *
+ * The following protected members can be examined afterwards:
+ *
+ * boolean isIfStatementResultRepeated
+ * boolean isIfStatementResultThen
+ *
+ * int thenSelectedCount
+ * int[] thenSelected
+ * int elseSelectedCount
+ * int[] elseSelected
+ */
+ super.evaluate(batch);
+
+ ColumnVector outputColVector = batch.cols[outputColumnNum];
+ boolean[] outputIsNull = outputColVector.isNull;
+
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
+
+ // CONSIDER: Should be do this for all vector expressions that can
+ // work on BytesColumnVector output columns???
+ outputColVector.init();
+
+ ColumnVector thenColVector = batch.cols[arg2Column];
+ ColumnVector elseColVector = batch.cols[arg3Column];
+
+ final int thenCount = thenSelectedCount;
+ final int elseCount = elseSelectedCount;
+
+ if (isIfStatementResultRepeated) {
+ if (isIfStatementResultThen) {
+ // Evaluate THEN expression (only) and copy all its results.
+ childExpressions[1].evaluate(batch);
+ thenColVector.copySelected(batch.selectedInUse, batch.selected, n, outputColVector);
+ } else {
+ // Evaluate ELSE expression (only) and copy all its results.
+ childExpressions[2].evaluate(batch);
+ elseColVector.copySelected(batch.selectedInUse, batch.selected, n, outputColVector);
+ }
+ return;
+ }
+
+ // NOTE: We cannot use copySelected below since it is a whole column operation.
+
+ conditionalEvaluate(batch, childExpressions[1], thenSelected, thenCount);
+ for (int i = 0; i < thenCount; i++) {
+ final int batchIndex = thenSelected[i];
+ outputIsNull[batchIndex] = false;
+ outputColVector.setElement(batchIndex, batchIndex, thenColVector);
+ }
+
+ // The ELSE expression is either IdentityExpression (a column) or a ConstantVectorExpression
+ // (a scalar) and trivial to evaluate.
+ childExpressions[2].evaluate(batch);
+ for (int i = 0; i < elseCount; i++) {
+ final int batchIndex = elseSelected[i];
+ outputIsNull[batchIndex] = false;
+ outputColVector.setElement(batchIndex, batchIndex, elseColVector);
+ }
+ }
+
+ @Override
+ public String vectorExpressionParameters() {
+ return getColumnParamString(0, arg1Column) + ", " + getColumnParamString(1, arg2Column) +
+ ", " + getColumnParamString(2, arg3Column);
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/53980ba6/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprCondExprCondExpr.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprCondExprCondExpr.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprCondExprCondExpr.java
new file mode 100644
index 0000000..7874d5c
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprCondExprCondExpr.java
@@ -0,0 +1,122 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+
+/**
+ * Do conditional execution of the THEN/ELSE vector expressions of a SQL IF statement.
+ */
+public class IfExprCondExprCondExpr extends IfExprCondExprBase {
+ private static final long serialVersionUID = 1L;
+
+ protected final int arg2Column;
+ protected final int arg3Column;
+
+ public IfExprCondExprCondExpr(int arg1Column, int arg2Column, int arg3Column,
+ int outputColumnNum) {
+ super(arg1Column, outputColumnNum);
+ this.arg2Column = arg2Column;
+ this.arg3Column = arg3Column;
+ }
+
+ public IfExprCondExprCondExpr() {
+ super();
+
+ // Dummy final assignments.
+ arg2Column = -1;
+ arg3Column = -1;
+ }
+
+ @Override
+ public void evaluate(VectorizedRowBatch batch) {
+
+ int n = batch.size;
+ if (n <= 0) {
+ // Nothing to do
+ return;
+ }
+
+ /*
+ * Do common analysis of the IF statement boolean expression.
+ *
+ * The following protected members can be examined afterwards:
+ *
+ * boolean isIfStatementResultRepeated
+ * boolean isIfStatementResultThen
+ *
+ * int thenSelectedCount
+ * int[] thenSelected
+ * int elseSelectedCount
+ * int[] elseSelected
+ */
+ super.evaluate(batch);
+
+ ColumnVector outputColVector = batch.cols[outputColumnNum];
+ boolean[] outputIsNull = outputColVector.isNull;
+
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
+
+ // CONSIDER: Should be do this for all vector expressions that can
+ // work on BytesColumnVector output columns???
+ outputColVector.init();
+
+ ColumnVector thenColVector = batch.cols[arg2Column];
+ ColumnVector elseColVector = batch.cols[arg3Column];
+
+ final int thenCount = thenSelectedCount;
+ final int elseCount = elseSelectedCount;
+
+ if (isIfStatementResultRepeated) {
+ if (isIfStatementResultThen) {
+ // Evaluate THEN expression (only) and copy all its results.
+ childExpressions[1].evaluate(batch);
+ thenColVector.copySelected(batch.selectedInUse, batch.selected, n, outputColVector);
+ } else {
+ // Evaluate ELSE expression (only) and copy all its results.
+ childExpressions[2].evaluate(batch);
+ elseColVector.copySelected(batch.selectedInUse, batch.selected, n, outputColVector);
+ }
+ return;
+ }
+
+ // NOTE: We cannot use copySelected below since it is a whole column operation.
+
+ conditionalEvaluate(batch, childExpressions[1], thenSelected, thenCount);
+ for (int i = 0; i < thenCount; i++) {
+ final int batchIndex = thenSelected[i];
+ outputIsNull[batchIndex] = false;
+ outputColVector.setElement(batchIndex, batchIndex, thenColVector);
+ }
+
+ conditionalEvaluate(batch, childExpressions[2], elseSelected, elseCount);
+ for (int i = 0; i < elseCount; i++) {
+ final int batchIndex = elseSelected[i];
+ outputIsNull[batchIndex] = false;
+ outputColVector.setElement(batchIndex, batchIndex, elseColVector);
+ }
+ }
+
+ @Override
+ public String vectorExpressionParameters() {
+ return getColumnParamString(0, arg1Column) + ", " + getColumnParamString(1, arg2Column) +
+ getColumnParamString(2, arg3Column);
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/53980ba6/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprCondExprNull.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprCondExprNull.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprCondExprNull.java
new file mode 100644
index 0000000..b2bf0e4
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprCondExprNull.java
@@ -0,0 +1,115 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+
+/**
+ * Do conditional execution of the THEN vector expression with NULL ELSE of a SQL IF statement.
+ */
+public class IfExprCondExprNull extends IfExprCondExprBase {
+ private static final long serialVersionUID = 1L;
+
+ protected final int arg2Column;
+
+ public IfExprCondExprNull(int arg1Column, int arg2Column, int outputColumnNum) {
+ super(arg1Column, outputColumnNum);
+ this.arg2Column = arg2Column;
+ }
+
+ public IfExprCondExprNull() {
+ super();
+
+ // Dummy final assignments.
+ arg2Column = -1;
+ }
+
+ @Override
+ public void evaluate(VectorizedRowBatch batch) {
+
+ int n = batch.size;
+ if (n <= 0) {
+ // Nothing to do
+ return;
+ }
+
+ /*
+ * Do common analysis of the IF statement boolean expression.
+ *
+ * The following protected members can be examined afterwards:
+ *
+ * boolean isIfStatementResultRepeated
+ * boolean isIfStatementResultThen
+ *
+ * int thenSelectedCount
+ * int[] thenSelected
+ * int elseSelectedCount
+ * int[] elseSelected
+ */
+ super.evaluate(batch);
+
+ ColumnVector outputColVector = batch.cols[outputColumnNum];
+ boolean[] outputIsNull = outputColVector.isNull;
+
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
+
+ // CONSIDER: Should be do this for all vector expressions that can
+ // work on BytesColumnVector output columns???
+ outputColVector.init();
+
+ ColumnVector thenColVector = batch.cols[arg2Column];
+
+ final int thenCount = thenSelectedCount;
+ final int elseCount = elseSelectedCount;
+
+ if (isIfStatementResultRepeated) {
+ if (isIfStatementResultThen) {
+ // Evaluate THEN expression (only) and copy all its results.
+ childExpressions[1].evaluate(batch);
+ thenColVector.copySelected(batch.selectedInUse, batch.selected, n, outputColVector);
+ } else {
+ outputIsNull[0] = true;
+ outputColVector.noNulls = false;
+ outputColVector.isRepeating = true;
+ }
+ return;
+ }
+
+ // NOTE: We cannot use copySelected below since it is a whole column operation.
+
+ conditionalEvaluate(batch, childExpressions[1], thenSelected, thenCount);
+ for (int i = 0; i < thenCount; i++) {
+ final int batchIndex = thenSelected[i];
+ outputIsNull[batchIndex] = false;
+ outputColVector.setElement(batchIndex, batchIndex, thenColVector);
+ }
+
+ outputColVector.noNulls = false;
+ for (int i = 0; i < elseCount; i++) {
+ outputColVector.isNull[elseSelected[i]] = true;
+ }
+ }
+
+ @Override
+ public String vectorExpressionParameters() {
+ return getColumnParamString(0, arg1Column) + ", " + getColumnParamString(1, arg2Column) +
+ ", null";
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/53980ba6/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprNullCondExpr.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprNullCondExpr.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprNullCondExpr.java
new file mode 100644
index 0000000..2ca3388
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprNullCondExpr.java
@@ -0,0 +1,117 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+
+/**
+ * Do conditional execution of a NULL THEN and a ELSE vector expression of a SQL IF statement.
+ */
+public class IfExprNullCondExpr extends IfExprCondExprBase {
+ private static final long serialVersionUID = 1L;
+
+ protected final int arg3Column;
+
+ public IfExprNullCondExpr(int arg1Column, int arg3Column, int outputColumnNum) {
+ super(arg1Column, outputColumnNum);
+ this.arg3Column = arg3Column;
+ }
+
+ public IfExprNullCondExpr() {
+ super();
+
+ // Dummy final assignments.
+ arg3Column = -1;
+ }
+
+ @Override
+ public void evaluate(VectorizedRowBatch batch) {
+
+ int n = batch.size;
+ if (n <= 0) {
+ // Nothing to do
+ return;
+ }
+
+ /*
+ * Do common analysis of the IF statement boolean expression.
+ *
+ * The following protected members can be examined afterwards:
+ *
+ * boolean isIfStatementResultRepeated
+ * boolean isIfStatementResultThen
+ *
+ * int thenSelectedCount
+ * int[] thenSelected
+ * int elseSelectedCount
+ * int[] elseSelected
+ */
+ super.evaluate(batch);
+
+ ColumnVector outputColVector = batch.cols[outputColumnNum];
+ boolean[] outputIsNull = outputColVector.isNull;
+
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
+
+ // CONSIDER: Should be do this for all vector expressions that can
+ // work on BytesColumnVector output columns???
+ outputColVector.init();
+
+ ColumnVector elseColVector = batch.cols[arg3Column];
+
+ final int thenCount = thenSelectedCount;
+ final int elseCount = elseSelectedCount;
+
+ if (isIfStatementResultRepeated) {
+ if (isIfStatementResultThen) {
+ outputIsNull[0] = true;
+ outputColVector.noNulls = false;
+ outputColVector.isRepeating = true;
+ } else {
+ // Evaluate ELSE expression (only) and copy all its results.
+ // Second input parameter but 3rd column.
+ childExpressions[1].evaluate(batch);
+ elseColVector.copySelected(batch.selectedInUse, batch.selected, n, outputColVector);
+ }
+ return;
+ }
+
+ // NOTE: We cannot use copySelected below since it is a whole column operation.
+
+ outputColVector.noNulls = false;
+ for (int i = 0; i < thenCount; i++) {
+ outputColVector.isNull[thenSelected[i]] = true;
+ }
+
+ // Second input parameter but 3rd column.
+ conditionalEvaluate(batch, childExpressions[1], elseSelected, elseCount);
+ for (int i = 0; i < elseCount; i++) {
+ final int batchIndex = elseSelected[i];
+ outputIsNull[batchIndex] = false;
+ outputColVector.setElement(batchIndex, batchIndex, elseColVector);
+ }
+ }
+
+ @Override
+ public String vectorExpressionParameters() {
+ // Second input parameter but 3rd column.
+ return getColumnParamString(0, arg1Column) + ", null, " + getColumnParamString(2, arg3Column);
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/53980ba6/ql/src/test/queries/clientpositive/vector_case_when_1.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_case_when_1.q b/ql/src/test/queries/clientpositive/vector_case_when_1.q
new file mode 100644
index 0000000..f081efb
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vector_case_when_1.q
@@ -0,0 +1,281 @@
+set hive.cli.print.header=true;
+set hive.mapred.mode=nonstrict;
+set hive.explain.user=false;
+set hive.fetch.task.conversion=none;
+set hive.vectorized.execution.enabled=true;
+
+CREATE TABLE lineitem_test_txt (L_ORDERKEY INT,
+ L_PARTKEY INT,
+ L_SUPPKEY INT,
+ L_LINENUMBER INT,
+ L_QUANTITY INT,
+ L_EXTENDEDPRICE DOUBLE,
+ L_DISCOUNT DOUBLE,
+ L_TAX DECIMAL(10,2),
+ L_RETURNFLAG CHAR(1),
+ L_LINESTATUS CHAR(1),
+ l_shipdate DATE,
+ L_COMMITDATE DATE,
+ L_RECEIPTDATE DATE,
+ L_SHIPINSTRUCT VARCHAR(20),
+ L_SHIPMODE CHAR(10),
+ L_COMMENT STRING)
+ROW FORMAT DELIMITED
+FIELDS TERMINATED BY '|';
+
+LOAD DATA LOCAL INPATH '../../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem_test_txt;
+CREATE TABLE lineitem_test STORED AS ORC AS SELECT * FROM lineitem_test_txt;
+INSERT INTO TABLE lineitem_test VALUES (NULL,NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
+
+SET hive.vectorized.if.expr.mode=adaptor;
+
+EXPLAIN VECTORIZATION DETAIL
+SELECT
+ L_QUANTITY as Quantity,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN "Many"
+ ELSE "Huge number" END AS Quantity_Description,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN "Many"
+ ELSE NULL END AS Quantity_Description_2,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN NULL
+ ELSE NULL END AS Quantity_Description_3,
+ IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date,
+ CASE WHEN L_RETURNFLAG = "N"
+ THEN l_extendedprice * (1 - l_discount)
+ ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting.
+ CASE WHEN L_RETURNFLAG = "N"
+ THEN l_extendedprice * (1 - l_discount)
+ ELSE CAST(0 AS DOUBLE) END AS Field_2,
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4,
+ -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal.
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6,
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8,
+ IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9,
+ IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10,
+ IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11,
+ IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12
+FROM lineitem_test
+ORDER BY Quantity;
+SELECT
+ L_QUANTITY as Quantity,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN "Many"
+ ELSE "Huge number" END AS Quantity_Description,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN "Many"
+ ELSE NULL END AS Quantity_Description_2,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN NULL
+ ELSE NULL END AS Quantity_Description_3,
+ IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date,
+ CASE WHEN L_RETURNFLAG = "N"
+ THEN l_extendedprice * (1 - l_discount)
+ ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting.
+ CASE WHEN L_RETURNFLAG = "N"
+ THEN l_extendedprice * (1 - l_discount)
+ ELSE CAST(0 AS DOUBLE) END AS Field_2,
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4,
+ -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal.
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6,
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8,
+ IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9,
+ IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10,
+ IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11,
+ IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12
+FROM lineitem_test
+ORDER BY Quantity;
+
+SET hive.vectorized.if.expr.mode=good;
+
+EXPLAIN VECTORIZATION DETAIL
+SELECT
+ L_QUANTITY as Quantity,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN "Many"
+ ELSE "Huge number" END AS Quantity_Description,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN "Many"
+ ELSE NULL END AS Quantity_Description_2,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN NULL
+ ELSE NULL END AS Quantity_Description_3,
+ IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date,
+ CASE WHEN L_RETURNFLAG = "N"
+ THEN l_extendedprice * (1 - l_discount)
+ ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting.
+ CASE WHEN L_RETURNFLAG = "N"
+ THEN l_extendedprice * (1 - l_discount)
+ ELSE CAST(0 AS DOUBLE) END AS Field_2,
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4,
+ -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal.
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6,
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8,
+ IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9,
+ IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10,
+ IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11,
+ IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12
+FROM lineitem_test
+ORDER BY Quantity;
+SELECT
+ L_QUANTITY as Quantity,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN "Many"
+ ELSE "Huge number" END AS Quantity_Description,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN "Many"
+ ELSE NULL END AS Quantity_Description_2,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN NULL
+ ELSE NULL END AS Quantity_Description_3,
+ IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date,
+ CASE WHEN L_RETURNFLAG = "N"
+ THEN l_extendedprice * (1 - l_discount)
+ ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting.
+ CASE WHEN L_RETURNFLAG = "N"
+ THEN l_extendedprice * (1 - l_discount)
+ ELSE CAST(0 AS DOUBLE) END AS Field_2,
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4,
+ -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal.
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6,
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8,
+ IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9,
+ IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10,
+ IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11,
+ IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12
+FROM lineitem_test
+ORDER BY Quantity;
+
+SET hive.vectorized.if.expr.mode=better;
+
+EXPLAIN VECTORIZATION DETAIL
+SELECT
+ L_QUANTITY as Quantity,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN "Many"
+ ELSE "Huge number" END AS Quantity_Description,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN "Many"
+ ELSE NULL END AS Quantity_Description_2,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN NULL
+ ELSE NULL END AS Quantity_Description_3,
+ IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date,
+ CASE WHEN L_RETURNFLAG = "N"
+ THEN l_extendedprice * (1 - l_discount)
+ ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting.
+ CASE WHEN L_RETURNFLAG = "N"
+ THEN l_extendedprice * (1 - l_discount)
+ ELSE CAST(0 AS DOUBLE) END AS Field_2,
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4,
+ -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal.
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6,
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8,
+ IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9,
+ IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10,
+ IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11,
+ IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12
+FROM lineitem_test
+ORDER BY Quantity;
+SELECT
+ L_QUANTITY as Quantity,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN "Many"
+ ELSE "Huge number" END AS Quantity_Description,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN "Many"
+ ELSE NULL END AS Quantity_Description_2,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN NULL
+ ELSE NULL END AS Quantity_Description_3,
+ IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date,
+ CASE WHEN L_RETURNFLAG = "N"
+ THEN l_extendedprice * (1 - l_discount)
+ ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting.
+ CASE WHEN L_RETURNFLAG = "N"
+ THEN l_extendedprice * (1 - l_discount)
+ ELSE CAST(0 AS DOUBLE) END AS Field_2,
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4,
+ -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal.
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6,
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8,
+ IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9,
+ IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10,
+ IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11,
+ IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12
+FROM lineitem_test
+ORDER BY Quantity;
+
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/53980ba6/ql/src/test/queries/clientpositive/vector_case_when_2.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_case_when_2.q b/ql/src/test/queries/clientpositive/vector_case_when_2.q
new file mode 100644
index 0000000..6854fc0
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vector_case_when_2.q
@@ -0,0 +1,208 @@
+set hive.cli.print.header=true;
+set hive.mapred.mode=nonstrict;
+set hive.explain.user=false;
+set hive.fetch.task.conversion=none;
+set hive.vectorized.execution.enabled=true;
+
+create table timestamps_txt (tsval timestamp) STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../../data/files/timestamps.txt' OVERWRITE INTO TABLE timestamps_txt;
+
+create table timestamps (cdate date, ctimestamp1 timestamp, stimestamp1 string, ctimestamp2 timestamp) stored as orc;
+insert overwrite table timestamps
+ select cast(tsval as date), tsval, cast(tsval as string), tsval - '1 2:3:4' day to second from timestamps_txt;
+
+INSERT INTO TABLE timestamps VALUES (NULL,NULL,NULL,NULL);
+
+SET hive.vectorized.if.expr.mode=adaptor;
+
+EXPLAIN VECTORIZATION DETAIL
+SELECT
+ ctimestamp1,
+ ctimestamp2,
+ CASE
+ WHEN ctimestamp2 <= date '1800-12-31' THEN "1800s or Earlier"
+ WHEN ctimestamp2 < date '1900-01-01' THEN "1900s"
+ WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s"
+ ELSE "Unknown" END AS ctimestamp2_Description,
+ CASE
+ WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old"
+ WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s"
+ WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s"
+ ELSE NULL END AS ctimestamp2_Description_2,
+ CASE
+ WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old"
+ WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s"
+ WHEN ctimestamp2 between timestamp '2006-01-01 00:00:00.000' and timestamp '2010-12-31 23:59:59.999999999' THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN NULL
+ ELSE NULL END AS ctimestamp2_Description_3,
+ IF(timestamp '1974-10-04 17:21:03.989' > ctimestamp1, year(ctimestamp1), year(ctimestamp2)) AS field1,
+ CASE WHEN stimestamp1 LIKE '%19%'
+ THEN stimestamp1
+ ELSE timestamp '2018-03-08 23:04:59' END AS Field_2,
+ IF(ctimestamp1 = timestamp '2021-09-24 03:18:32.413655165' , NULL, minute(ctimestamp1)) AS Field_3,
+ IF(ctimestamp2 >= timestamp '5344-10-04 18:40:08.165' and ctimestamp2 < timestamp '6631-11-13 16:31:29.702202248', minute(ctimestamp1), NULL) AS Field_4,
+ IF(cast(ctimestamp1 as double) % 500 > 100, DATE_ADD(cdate, 1), DATE_ADD(cdate, 365)) AS Field_5
+FROM timestamps
+ORDER BY ctimestamp1, stimestamp1, ctimestamp2;
+SELECT
+ ctimestamp1,
+ ctimestamp2,
+ CASE
+ WHEN ctimestamp2 <= date '1800-12-31' THEN "1800s or Earlier"
+ WHEN ctimestamp2 < date '1900-01-01' THEN "1900s"
+ WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s"
+ ELSE "Unknown" END AS ctimestamp2_Description,
+ CASE
+ WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old"
+ WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s"
+ WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s"
+ ELSE NULL END AS ctimestamp2_Description_2,
+ CASE
+ WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old"
+ WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s"
+ WHEN ctimestamp2 between timestamp '2006-01-01 00:00:00.000' and timestamp '2010-12-31 23:59:59.999999999' THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN NULL
+ ELSE NULL END AS ctimestamp2_Description_3,
+ IF(timestamp '1974-10-04 17:21:03.989' > ctimestamp1, year(ctimestamp1), year(ctimestamp2)) AS field1,
+ CASE WHEN stimestamp1 LIKE '%19%'
+ THEN stimestamp1
+ ELSE timestamp '2018-03-08 23:04:59' END AS Field_2,
+ IF(ctimestamp1 = timestamp '2021-09-24 03:18:32.413655165' , NULL, minute(ctimestamp1)) AS Field_3,
+ IF(ctimestamp2 >= timestamp '5344-10-04 18:40:08.165' and ctimestamp2 < timestamp '6631-11-13 16:31:29.702202248', minute(ctimestamp1), NULL) AS Field_4,
+ IF(cast(ctimestamp1 as double) % 500 > 100, DATE_ADD(cdate, 1), DATE_ADD(cdate, 365)) AS Field_5
+FROM timestamps
+ORDER BY ctimestamp1, stimestamp1, ctimestamp2;
+
+SET hive.vectorized.if.expr.mode=good;
+
+EXPLAIN VECTORIZATION DETAIL
+SELECT
+ ctimestamp1,
+ ctimestamp2,
+ CASE
+ WHEN ctimestamp2 <= date '1800-12-31' THEN "1800s or Earlier"
+ WHEN ctimestamp2 < date '1900-01-01' THEN "1900s"
+ WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s"
+ ELSE "Unknown" END AS ctimestamp2_Description,
+ CASE
+ WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old"
+ WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s"
+ WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s"
+ ELSE NULL END AS ctimestamp2_Description_2,
+ CASE
+ WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old"
+ WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s"
+ WHEN ctimestamp2 between timestamp '2006-01-01 00:00:00.000' and timestamp '2010-12-31 23:59:59.999999999' THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN NULL
+ ELSE NULL END AS ctimestamp2_Description_3,
+ IF(timestamp '1974-10-04 17:21:03.989' > ctimestamp1, year(ctimestamp1), year(ctimestamp2)) AS field1,
+ CASE WHEN stimestamp1 LIKE '%19%'
+ THEN stimestamp1
+ ELSE timestamp '2018-03-08 23:04:59' END AS Field_2,
+ IF(ctimestamp1 = timestamp '2021-09-24 03:18:32.413655165' , NULL, minute(ctimestamp1)) AS Field_3,
+ IF(ctimestamp2 >= timestamp '5344-10-04 18:40:08.165' and ctimestamp2 < timestamp '6631-11-13 16:31:29.702202248', minute(ctimestamp1), NULL) AS Field_4,
+ IF(cast(ctimestamp1 as double) % 500 > 100, DATE_ADD(cdate, 1), DATE_ADD(cdate, 365)) AS Field_5
+FROM timestamps
+ORDER BY ctimestamp1, stimestamp1, ctimestamp2;
+SELECT
+ ctimestamp1,
+ ctimestamp2,
+ CASE
+ WHEN ctimestamp2 <= date '1800-12-31' THEN "1800s or Earlier"
+ WHEN ctimestamp2 < date '1900-01-01' THEN "1900s"
+ WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s"
+ ELSE "Unknown" END AS ctimestamp2_Description,
+ CASE
+ WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old"
+ WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s"
+ WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s"
+ ELSE NULL END AS ctimestamp2_Description_2,
+ CASE
+ WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old"
+ WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s"
+ WHEN ctimestamp2 between timestamp '2006-01-01 00:00:00.000' and timestamp '2010-12-31 23:59:59.999999999' THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN NULL
+ ELSE NULL END AS ctimestamp2_Description_3,
+ IF(timestamp '1974-10-04 17:21:03.989' > ctimestamp1, year(ctimestamp1), year(ctimestamp2)) AS field1,
+ CASE WHEN stimestamp1 LIKE '%19%'
+ THEN stimestamp1
+ ELSE timestamp '2018-03-08 23:04:59' END AS Field_2,
+ IF(ctimestamp1 = timestamp '2021-09-24 03:18:32.413655165' , NULL, minute(ctimestamp1)) AS Field_3,
+ IF(ctimestamp2 >= timestamp '5344-10-04 18:40:08.165' and ctimestamp2 < timestamp '6631-11-13 16:31:29.702202248', minute(ctimestamp1), NULL) AS Field_4,
+ IF(cast(ctimestamp1 as double) % 500 > 100, DATE_ADD(cdate, 1), DATE_ADD(cdate, 365)) AS Field_5
+FROM timestamps
+ORDER BY ctimestamp1, stimestamp1, ctimestamp2;
+
+SET hive.vectorized.if.expr.mode=better;
+
+EXPLAIN VECTORIZATION DETAIL
+SELECT
+ ctimestamp1,
+ ctimestamp2,
+ CASE
+ WHEN ctimestamp2 <= date '1800-12-31' THEN "1800s or Earlier"
+ WHEN ctimestamp2 < date '1900-01-01' THEN "1900s"
+ WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s"
+ ELSE "Unknown" END AS ctimestamp2_Description,
+ CASE
+ WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old"
+ WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s"
+ WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s"
+ ELSE NULL END AS ctimestamp2_Description_2,
+ CASE
+ WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old"
+ WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s"
+ WHEN ctimestamp2 between timestamp '2006-01-01 00:00:00.000' and timestamp '2010-12-31 23:59:59.999999999' THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN NULL
+ ELSE NULL END AS ctimestamp2_Description_3,
+ IF(timestamp '1974-10-04 17:21:03.989' > ctimestamp1, year(ctimestamp1), year(ctimestamp2)) AS field1,
+ CASE WHEN stimestamp1 LIKE '%19%'
+ THEN stimestamp1
+ ELSE timestamp '2018-03-08 23:04:59' END AS Field_2,
+ IF(ctimestamp1 = timestamp '2021-09-24 03:18:32.413655165' , NULL, minute(ctimestamp1)) AS Field_3,
+ IF(ctimestamp2 >= timestamp '5344-10-04 18:40:08.165' and ctimestamp2 < timestamp '6631-11-13 16:31:29.702202248', minute(ctimestamp1), NULL) AS Field_4,
+ IF(cast(ctimestamp1 as double) % 500 > 100, DATE_ADD(cdate, 1), DATE_ADD(cdate, 365)) AS Field_5
+FROM timestamps
+ORDER BY ctimestamp1, stimestamp1, ctimestamp2;
+SELECT
+ ctimestamp1,
+ ctimestamp2,
+ CASE
+ WHEN ctimestamp2 <= date '1800-12-31' THEN "1800s or Earlier"
+ WHEN ctimestamp2 < date '1900-01-01' THEN "1900s"
+ WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s"
+ ELSE "Unknown" END AS ctimestamp2_Description,
+ CASE
+ WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old"
+ WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s"
+ WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s"
+ ELSE NULL END AS ctimestamp2_Description_2,
+ CASE
+ WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old"
+ WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s"
+ WHEN ctimestamp2 between timestamp '2006-01-01 00:00:00.000' and timestamp '2010-12-31 23:59:59.999999999' THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN NULL
+ ELSE NULL END AS ctimestamp2_Description_3,
+ IF(timestamp '1974-10-04 17:21:03.989' > ctimestamp1, year(ctimestamp1), year(ctimestamp2)) AS field1,
+ CASE WHEN stimestamp1 LIKE '%19%'
+ THEN stimestamp1
+ ELSE timestamp '2018-03-08 23:04:59' END AS Field_2,
+ IF(ctimestamp1 = timestamp '2021-09-24 03:18:32.413655165' , NULL, minute(ctimestamp1)) AS Field_3,
+ IF(ctimestamp2 >= timestamp '5344-10-04 18:40:08.165' and ctimestamp2 < timestamp '6631-11-13 16:31:29.702202248', minute(ctimestamp1), NULL) AS Field_4,
+ IF(cast(ctimestamp1 as double) % 500 > 100, DATE_ADD(cdate, 1), DATE_ADD(cdate, 365)) AS Field_5
+FROM timestamps
+ORDER BY ctimestamp1, stimestamp1, ctimestamp2;
+
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/53980ba6/ql/src/test/queries/clientpositive/vector_udf_adaptor_1.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_udf_adaptor_1.q b/ql/src/test/queries/clientpositive/vector_udf_adaptor_1.q
index 2eb0a0a..565edee 100644
--- a/ql/src/test/queries/clientpositive/vector_udf_adaptor_1.q
+++ b/ql/src/test/queries/clientpositive/vector_udf_adaptor_1.q
@@ -1,27 +1,209 @@
+set hive.cli.print.header=true;
SET hive.vectorized.execution.enabled=true;
set hive.fetch.task.conversion=none;
set hive.stats.column.autogather=false;
-create table student_2_lines(
+-- SORT_QUERY_RESULTS
+
+create table student_10_lines_txt(
name string,
age int,
gpa double)
row format delimited
fields terminated by '\001'
stored as textfile;
-LOAD DATA LOCAL INPATH '../../data/files/student_2_lines' OVERWRITE INTO TABLE student_2_lines;
-analyze table student_2_lines compute statistics;
+LOAD DATA LOCAL INPATH '../../data/files/student_10_lines' OVERWRITE INTO TABLE student_10_lines_txt;
+CREATE TABLE student_10_lines STORED AS ORC AS SELECT * FROM student_10_lines_txt;
+INSERT INTO TABLE student_10_lines VALUES (NULL, NULL, NULL);
+INSERT INTO TABLE student_10_lines VALUES ("George", 22, 3.8);
+analyze table student_10_lines compute statistics;
+
+------------------------------------------------------------------------------------------
+
+SET hive.vectorized.if.expr.mode=adaptor;
+
+create table insert_a_adaptor (name string, age int, gpa double, a int, b timestamp, c string, d binary, e int, f double);
+
+explain vectorization detail
+insert overwrite table insert_a_adaptor
+ select
+ name,
+ age,
+ gpa,
+ IF(age<40, age, NULL),
+ IF(age>40, cast('2011-01-01 01:01:01' as timestamp), NULL),
+ IF(LENGTH(name)>8, name, NULL),
+ IF(LENGTH(name)<8, cast(name as binary), NULL),
+ IF(age>40, LENGTH(name), NULL),
+ IF(LENGTH(name)> 10, 2 * gpa, NULL)
+ from student_10_lines;
+insert overwrite table insert_a_adaptor
+ select
+ name,
+ age,
+ gpa,
+ IF(age<40, age, NULL),
+ IF(age>40, cast('2011-01-01 01:01:01' as timestamp), NULL),
+ IF(LENGTH(name)>8, name, NULL),
+ IF(LENGTH(name)<8, cast(name as binary), NULL),
+ IF(age>40, LENGTH(name), NULL),
+ IF(LENGTH(name)> 10, 2 * gpa, NULL)
+ from student_10_lines;
+select * from insert_a_adaptor;
+
+SET hive.vectorized.if.expr.mode=good;
+
+create table insert_a_good (name string, age int, gpa double, a int, b timestamp, c string, d binary, e int, f double);
+
+explain vectorization detail
+insert overwrite table insert_a_good
+ select
+ name,
+ age,
+ gpa,
+ IF(age<40, age, NULL),
+ IF(age>40, cast('2011-01-01 01:01:01' as timestamp), NULL),
+ IF(LENGTH(name)>8, name, NULL),
+ IF(LENGTH(name)<8, cast(name as binary), NULL),
+ IF(age>40, LENGTH(name), NULL),
+ IF(LENGTH(name)> 10, 2 * gpa, NULL)
+ from student_10_lines;
+insert overwrite table insert_a_good
+ select
+ name,
+ age,
+ gpa,
+ IF(age<40, age, NULL),
+ IF(age>40, cast('2011-01-01 01:01:01' as timestamp), NULL),
+ IF(LENGTH(name)>8, name, NULL),
+ IF(LENGTH(name)<8, cast(name as binary), NULL),
+ IF(age>40, LENGTH(name), NULL),
+ IF(LENGTH(name)> 10, 2 * gpa, NULL)
+ from student_10_lines;
+select * from insert_a_good;
+
+SET hive.vectorized.if.expr.mode=better;
+
+create table insert_a_better (name string, age int, gpa double, a int, b timestamp, c string, d binary, e int, f double);
+
+explain vectorization detail
+insert overwrite table insert_a_better
+ select
+ name,
+ age,
+ gpa,
+ IF(age<40, age, NULL),
+ IF(age>40, cast('2011-01-01 01:01:01' as timestamp), NULL),
+ IF(LENGTH(name)>8, name, NULL),
+ IF(LENGTH(name)<8, cast(name as binary), NULL),
+ IF(age>40, LENGTH(name), NULL),
+ IF(LENGTH(name)> 10, 2 * gpa, NULL)
+ from student_10_lines;
+insert overwrite table insert_a_better
+ select
+ name,
+ age,
+ gpa,
+ IF(age<40, age, NULL),
+ IF(age>40, cast('2011-01-01 01:01:01' as timestamp), NULL),
+ IF(LENGTH(name)>8, name, NULL),
+ IF(LENGTH(name)<8, cast(name as binary), NULL),
+ IF(age>40, LENGTH(name), NULL),
+ IF(LENGTH(name)> 10, 2 * gpa, NULL)
+ from student_10_lines;
+select * from insert_a_better;
+
+------------------------------------------------------------------------------------------
+
+SET hive.vectorized.if.expr.mode=adaptor;
+
+create table insert_b_adaptor (name string, age int, gpa double, a int, b timestamp, c string, d binary, e int, f double);
+
+explain vectorization detail
+insert overwrite table insert_b_adaptor
+ select
+ name,
+ age,
+ gpa,
+ IF(age<40, NULL, age),
+ IF(age>40, NULL, cast('2011-01-01 01:01:01' as timestamp)),
+ IF(LENGTH(name)>8, NULL, name),
+ IF(LENGTH(name)<8, NULL, cast(name as binary)),
+ IF(age>40, NULL, LENGTH(name)),
+ IF(LENGTH(name)> 10, NULL, 2 * gpa)
+ from student_10_lines;
+insert overwrite table insert_b_adaptor
+ select
+ name,
+ age,
+ gpa,
+ IF(age<40, NULL, age),
+ IF(age>40, NULL, cast('2011-01-01 01:01:01' as timestamp)),
+ IF(LENGTH(name)>8, NULL, name),
+ IF(LENGTH(name)<8, NULL, cast(name as binary)),
+ IF(age>40, NULL, LENGTH(name)),
+ IF(LENGTH(name)> 10, NULL, 2 * gpa)
+ from student_10_lines;
+select * from insert_b_adaptor;
+
+SET hive.vectorized.if.expr.mode=good;
+
+create table insert_b_good (name string, age int, gpa double, a int, b timestamp, c string, d binary, e int, f double);
+
+explain vectorization detail
+insert overwrite table insert_b_good
+ select
+ name,
+ age,
+ gpa,
+ IF(age<40, NULL, age),
+ IF(age>40, NULL, cast('2011-01-01 01:01:01' as timestamp)),
+ IF(LENGTH(name)>8, NULL, name),
+ IF(LENGTH(name)<8, NULL, cast(name as binary)),
+ IF(age>40, NULL, LENGTH(name)),
+ IF(LENGTH(name)> 10, NULL, 2 * gpa)
+ from student_10_lines;
+insert overwrite table insert_b_good
+ select
+ name,
+ age,
+ gpa,
+ IF(age<40, NULL, age),
+ IF(age>40, NULL, cast('2011-01-01 01:01:01' as timestamp)),
+ IF(LENGTH(name)>8, NULL, name),
+ IF(LENGTH(name)<8, NULL, cast(name as binary)),
+ IF(age>40, NULL, LENGTH(name)),
+ IF(LENGTH(name)> 10, NULL, 2 * gpa)
+ from student_10_lines;
+select * from insert_b_good;
+
+SET hive.vectorized.if.expr.mode=better;
-create table insert_10_1 (a float, b int, c timestamp, d binary);
+create table insert_b_better (name string, age int, gpa double, a int, b timestamp, c string, d binary, e int, f double);
explain vectorization detail
-insert overwrite table insert_10_1
- select cast(gpa as float),
- age,
- IF(age>40,cast('2011-01-01 01:01:01' as timestamp),NULL),
- IF(LENGTH(name)>10,cast(name as binary),NULL) from student_2_lines;
-insert overwrite table insert_10_1
- select cast(gpa as float),
- age,
- IF(age>40,cast('2011-01-01 01:01:01' as timestamp),NULL),
- IF(LENGTH(name)>10,cast(name as binary),NULL) from student_2_lines;
\ No newline at end of file
+insert overwrite table insert_b_better
+ select
+ name,
+ age,
+ gpa,
+ IF(age<40, NULL, age),
+ IF(age>40, NULL, cast('2011-01-01 01:01:01' as timestamp)),
+ IF(LENGTH(name)>8, NULL, name),
+ IF(LENGTH(name)<8, NULL, cast(name as binary)),
+ IF(age>40, NULL, LENGTH(name)),
+ IF(LENGTH(name)> 10, NULL, 2 * gpa)
+ from student_10_lines;
+insert overwrite table insert_b_better
+ select
+ name,
+ age,
+ gpa,
+ IF(age<40, NULL, age),
+ IF(age>40, NULL, cast('2011-01-01 01:01:01' as timestamp)),
+ IF(LENGTH(name)>8, NULL, name),
+ IF(LENGTH(name)<8, NULL, cast(name as binary)),
+ IF(age>40, NULL, LENGTH(name)),
+ IF(LENGTH(name)> 10, NULL, 2 * gpa)
+ from student_10_lines;
+select * from insert_b_better;
\ No newline at end of file
[3/7] hive git commit: HIVE-18819: Vectorization: Optimize IF
statement expression evaluation of THEN/ELSE (Matt McCline,
reviewed by Teddy Choi)
Posted by mm...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/53980ba6/ql/src/test/results/clientpositive/spark/vectorized_case.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorized_case.q.out b/ql/src/test/results/clientpositive/spark/vectorized_case.q.out
index 5380c9c..39d3bbe 100644
--- a/ql/src/test/results/clientpositive/spark/vectorized_case.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorized_case.q.out
@@ -68,8 +68,8 @@ STAGE PLANS:
Select Vectorization:
className: VectorSelectOperator
native: true
- projectedOutputColumnNums: [1, 16, 17]
- selectExpressions: IfExprStringScalarStringGroupColumn(col 13:boolean, val acol 15:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 13:boolean, IfExprStringScalarStringScalar(col 14:boolean, val b, val c)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 14:boolean) -> 15:string) -> 16:string, IfExprStringScalarStringGroupColumn(col 13:boolean, val acol 15:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 13:boolean, IfExprStringScalarStringScalar(col 14:boolean, val b, val c)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 14:boolean) -> 15:string) -> 17:string
+ projectedOutputColumnNums: [1, 17, 21]
+ selectExpressions: IfExprColumnCondExpr(col 13:boolean, col 14:stringcol 16:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 13:boolean, ConstantVectorExpression(val a) -> 14:string, IfExprStringScalarStringScalar(col 15:boolean, val b, val c)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 15:boolean) -> 16:string) -> 17:string, IfExprColumnCondExpr(col 15:boolean, col 18:stringcol 20:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 15:boolean, ConstantVectorExpression(val a) -> 18:string, IfExprStringScalarStringScalar(col 19:boolean, val b, val c)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 19:boolean) -> 20:string) -> 21:string
Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
@@ -96,7 +96,7 @@ STAGE PLANS:
includeColumns: [1]
dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
partitionColumnCount: 0
- scratchColumnTypeNames: [bigint, bigint, string, string, string]
+ scratchColumnTypeNames: [bigint, string, bigint, string, string, string, bigint, string, string]
Stage: Stage-0
Fetch Operator
@@ -216,8 +216,8 @@ STAGE PLANS:
Select Vectorization:
className: VectorSelectOperator
native: true
- projectedOutputColumnNums: [1, 17, 20]
- selectExpressions: IfExprStringScalarStringGroupColumn(col 13:boolean, val acol 16:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 13:boolean, IfExprColumnNull(col 14:boolean, col 15:string, null)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 14:boolean, ConstantVectorExpression(val b) -> 15:string) -> 16:string) -> 17:string, IfExprStringScalarStringGroupColumn(col 13:boolean, val acol 19:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 13:boolean, IfExprNullColumn(col 18:boolean, null, col 16)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 18:boolean, ConstantVectorExpression(val c) -> 16:string) -> 19:string) -> 20:string
+ projectedOutputColumnNums: [1, 18, 24]
+ selectExpressions: IfExprColumnCondExpr(col 13:boolean, col 14:stringcol 17:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 13:boolean, ConstantVectorExpression(val a) -> 14:string, IfExprColumnNull(col 15:boolean, col 16:string, null)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 15:boolean, ConstantVectorExpression(val b) -> 16:string) -> 17:string) -> 18:string, IfExprColumnCondExpr(col 19:boolean, col 20:stringcol 23:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 19:boolean, ConstantVectorExpression(val a) -> 20:string, IfExprNullColumn(col 21:boolean, null, col 22)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 21:boolean, ConstantVectorExpression(val c) -> 22:string) -> 23:string) -> 24:string
Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
@@ -244,7 +244,7 @@ STAGE PLANS:
includeColumns: [1]
dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
partitionColumnCount: 0
- scratchColumnTypeNames: [bigint, bigint, string, string, string, bigint, string, string]
+ scratchColumnTypeNames: [bigint, string, bigint, string, string, string, bigint, string, bigint, string, string, string]
Stage: Stage-0
Fetch Operator
@@ -588,7 +588,7 @@ STAGE PLANS:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [6]
- selectExpressions: VectorUDFAdaptor(if((member = 1), (attr + 1), (attr + 2)))(children: VectorUDFAdaptor((member = 1)) -> 3:boolean, DecimalColAddDecimalScalar(col 1:decimal(10,0), val 1) -> 4:decimal(11,0), DecimalColAddDecimalScalar(col 1:decimal(10,0), val 2) -> 5:decimal(11,0)) -> 6:decimal(11,0)
+ selectExpressions: IfExprCondExprCondExpr(col 3:boolean, col 4:decimal(11,0)col 5:decimal(11,0))(children: VectorUDFAdaptor((member = 1)) -> 3:boolean, DecimalColAddDecimalScalar(col 1:decimal(10,0), val 1) -> 4:decimal(11,0), DecimalColAddDecimalScalar(col 1:decimal(10,0), val 2) -> 5:decimal(11,0)) -> 6:decimal(11,0)
Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
@@ -667,8 +667,8 @@ STAGE PLANS:
Select Vectorization:
className: VectorSelectOperator
native: true
- projectedOutputColumnNums: [5]
- selectExpressions: VectorUDFAdaptor(if((member = 1), 1, (attr + 2)))(children: VectorUDFAdaptor((member = 1)) -> 3:boolean, DecimalColAddDecimalScalar(col 1:decimal(10,0), val 2) -> 4:decimal(11,0)) -> 5:decimal(11,0)
+ projectedOutputColumnNums: [6]
+ selectExpressions: IfExprColumnCondExpr(col 3:boolean, col 4:decimal(1,0)col 5:decimal(11,0))(children: VectorUDFAdaptor((member = 1)) -> 3:boolean, ConstantVectorExpression(val 1) -> 4:decimal(1,0), DecimalColAddDecimalScalar(col 1:decimal(10,0), val 2) -> 5:decimal(11,0)) -> 6:decimal(11,0)
Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
@@ -695,7 +695,7 @@ STAGE PLANS:
includeColumns: [0, 1]
dataColumns: member:decimal(10,0), attr:decimal(10,0)
partitionColumnCount: 0
- scratchColumnTypeNames: [bigint, decimal(11,0), decimal(11,0)]
+ scratchColumnTypeNames: [bigint, decimal(1,0), decimal(11,0), decimal(11,0)]
Stage: Stage-0
Fetch Operator
@@ -747,8 +747,8 @@ STAGE PLANS:
Select Vectorization:
className: VectorSelectOperator
native: true
- projectedOutputColumnNums: [5]
- selectExpressions: VectorUDFAdaptor(if((member = 1), (attr + 1), 2))(children: VectorUDFAdaptor((member = 1)) -> 3:boolean, DecimalColAddDecimalScalar(col 1:decimal(10,0), val 1) -> 4:decimal(11,0)) -> 5:decimal(11,0)
+ projectedOutputColumnNums: [6]
+ selectExpressions: IfExprCondExprColumn(col 3:boolean, col 4:decimal(11,0), col 5:decimal(1,0))(children: VectorUDFAdaptor((member = 1)) -> 3:boolean, DecimalColAddDecimalScalar(col 1:decimal(10,0), val 1) -> 4:decimal(11,0), ConstantVectorExpression(val 2) -> 5:decimal(1,0)) -> 6:decimal(11,0)
Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
@@ -775,7 +775,7 @@ STAGE PLANS:
includeColumns: [0, 1]
dataColumns: member:decimal(10,0), attr:decimal(10,0)
partitionColumnCount: 0
- scratchColumnTypeNames: [bigint, decimal(11,0), decimal(11,0)]
+ scratchColumnTypeNames: [bigint, decimal(11,0), decimal(1,0), decimal(11,0)]
Stage: Stage-0
Fetch Operator
@@ -846,7 +846,7 @@ STAGE PLANS:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [6]
- selectExpressions: IfExprLongColumnLongColumn(col 3:boolean, col 4:bigint, col 5:bigint)(children: LongColEqualLongScalar(col 0:bigint, val 1) -> 3:boolean, LongColAddLongScalar(col 1:bigint, val 1) -> 4:bigint, LongColAddLongScalar(col 1:bigint, val 2) -> 5:bigint) -> 6:bigint
+ selectExpressions: IfExprCondExprCondExpr(col 3:boolean, col 4:bigintcol 5:bigint)(children: LongColEqualLongScalar(col 0:bigint, val 1) -> 3:boolean, LongColAddLongScalar(col 1:bigint, val 1) -> 4:bigint, LongColAddLongScalar(col 1:bigint, val 2) -> 5:bigint) -> 6:bigint
Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
@@ -926,7 +926,7 @@ STAGE PLANS:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [5]
- selectExpressions: IfExprNullColumn(col 3:boolean, null, col 4)(children: LongColEqualLongScalar(col 0:bigint, val 1) -> 3:boolean, LongColAddLongScalar(col 1:bigint, val 2) -> 4:bigint) -> 5:bigint
+ selectExpressions: IfExprNullCondExpr(col 3:boolean, null, col 4:bigint)(children: LongColEqualLongScalar(col 0:bigint, val 1) -> 3:boolean, LongColAddLongScalar(col 1:bigint, val 2) -> 4:bigint) -> 5:bigint
Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
@@ -1006,7 +1006,7 @@ STAGE PLANS:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [5]
- selectExpressions: IfExprColumnNull(col 3:boolean, col 4:bigint, null)(children: LongColEqualLongScalar(col 0:bigint, val 1) -> 3:boolean, LongColAddLongScalar(col 1:bigint, val 1) -> 4:bigint) -> 5:bigint
+ selectExpressions: IfExprCondExprNull(col 3:boolean, col 4:bigint, null)(children: LongColEqualLongScalar(col 0:bigint, val 1) -> 3:boolean, LongColAddLongScalar(col 1:bigint, val 1) -> 4:bigint) -> 5:bigint
Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
[2/7] hive git commit: HIVE-18819: Vectorization: Optimize IF
statement expression evaluation of THEN/ELSE (Matt McCline,
reviewed by Teddy Choi)
Posted by mm...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/53980ba6/ql/src/test/results/clientpositive/vector_case_when_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_case_when_1.q.out b/ql/src/test/results/clientpositive/vector_case_when_1.q.out
new file mode 100644
index 0000000..f80f477
--- /dev/null
+++ b/ql/src/test/results/clientpositive/vector_case_when_1.q.out
@@ -0,0 +1,1126 @@
+PREHOOK: query: CREATE TABLE lineitem_test_txt (L_ORDERKEY INT,
+ L_PARTKEY INT,
+ L_SUPPKEY INT,
+ L_LINENUMBER INT,
+ L_QUANTITY INT,
+ L_EXTENDEDPRICE DOUBLE,
+ L_DISCOUNT DOUBLE,
+ L_TAX DECIMAL(10,2),
+ L_RETURNFLAG CHAR(1),
+ L_LINESTATUS CHAR(1),
+ l_shipdate DATE,
+ L_COMMITDATE DATE,
+ L_RECEIPTDATE DATE,
+ L_SHIPINSTRUCT VARCHAR(20),
+ L_SHIPMODE CHAR(10),
+ L_COMMENT STRING)
+ROW FORMAT DELIMITED
+FIELDS TERMINATED BY '|'
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@lineitem_test_txt
+POSTHOOK: query: CREATE TABLE lineitem_test_txt (L_ORDERKEY INT,
+ L_PARTKEY INT,
+ L_SUPPKEY INT,
+ L_LINENUMBER INT,
+ L_QUANTITY INT,
+ L_EXTENDEDPRICE DOUBLE,
+ L_DISCOUNT DOUBLE,
+ L_TAX DECIMAL(10,2),
+ L_RETURNFLAG CHAR(1),
+ L_LINESTATUS CHAR(1),
+ l_shipdate DATE,
+ L_COMMITDATE DATE,
+ L_RECEIPTDATE DATE,
+ L_SHIPINSTRUCT VARCHAR(20),
+ L_SHIPMODE CHAR(10),
+ L_COMMENT STRING)
+ROW FORMAT DELIMITED
+FIELDS TERMINATED BY '|'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@lineitem_test_txt
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem_test_txt
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@lineitem_test_txt
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem_test_txt
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@lineitem_test_txt
+PREHOOK: query: CREATE TABLE lineitem_test STORED AS ORC AS SELECT * FROM lineitem_test_txt
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@lineitem_test_txt
+PREHOOK: Output: database:default
+PREHOOK: Output: default@lineitem_test
+POSTHOOK: query: CREATE TABLE lineitem_test STORED AS ORC AS SELECT * FROM lineitem_test_txt
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@lineitem_test_txt
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@lineitem_test
+POSTHOOK: Lineage: lineitem_test.l_comment SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_comment, type:string, comment:null), ]
+POSTHOOK: Lineage: lineitem_test.l_commitdate SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_commitdate, type:date, comment:null), ]
+POSTHOOK: Lineage: lineitem_test.l_discount SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_discount, type:double, comment:null), ]
+POSTHOOK: Lineage: lineitem_test.l_extendedprice SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_extendedprice, type:double, comment:null), ]
+POSTHOOK: Lineage: lineitem_test.l_linenumber SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_linenumber, type:int, comment:null), ]
+POSTHOOK: Lineage: lineitem_test.l_linestatus SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_linestatus, type:char(1), comment:null), ]
+POSTHOOK: Lineage: lineitem_test.l_orderkey SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_orderkey, type:int, comment:null), ]
+POSTHOOK: Lineage: lineitem_test.l_partkey SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_partkey, type:int, comment:null), ]
+POSTHOOK: Lineage: lineitem_test.l_quantity SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_quantity, type:int, comment:null), ]
+POSTHOOK: Lineage: lineitem_test.l_receiptdate SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_receiptdate, type:date, comment:null), ]
+POSTHOOK: Lineage: lineitem_test.l_returnflag SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_returnflag, type:char(1), comment:null), ]
+POSTHOOK: Lineage: lineitem_test.l_shipdate SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_shipdate, type:date, comment:null), ]
+POSTHOOK: Lineage: lineitem_test.l_shipinstruct SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_shipinstruct, type:varchar(20), comment:null), ]
+POSTHOOK: Lineage: lineitem_test.l_shipmode SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_shipmode, type:char(10), comment:null), ]
+POSTHOOK: Lineage: lineitem_test.l_suppkey SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_suppkey, type:int, comment:null), ]
+POSTHOOK: Lineage: lineitem_test.l_tax SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_tax, type:decimal(10,2), comment:null), ]
+lineitem_test_txt.l_orderkey lineitem_test_txt.l_partkey lineitem_test_txt.l_suppkey lineitem_test_txt.l_linenumber lineitem_test_txt.l_quantity lineitem_test_txt.l_extendedprice lineitem_test_txt.l_discount lineitem_test_txt.l_tax lineitem_test_txt.l_returnflag lineitem_test_txt.l_linestatus lineitem_test_txt.l_shipdate lineitem_test_txt.l_commitdate lineitem_test_txt.l_receiptdate lineitem_test_txt.l_shipinstruct lineitem_test_txt.l_shipmode lineitem_test_txt.l_comment
+PREHOOK: query: INSERT INTO TABLE lineitem_test VALUES (NULL,NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@lineitem_test
+POSTHOOK: query: INSERT INTO TABLE lineitem_test VALUES (NULL,NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@lineitem_test
+POSTHOOK: Lineage: lineitem_test.l_comment EXPRESSION []
+POSTHOOK: Lineage: lineitem_test.l_commitdate EXPRESSION []
+POSTHOOK: Lineage: lineitem_test.l_discount EXPRESSION []
+POSTHOOK: Lineage: lineitem_test.l_extendedprice EXPRESSION []
+POSTHOOK: Lineage: lineitem_test.l_linenumber EXPRESSION []
+POSTHOOK: Lineage: lineitem_test.l_linestatus EXPRESSION []
+POSTHOOK: Lineage: lineitem_test.l_orderkey EXPRESSION []
+POSTHOOK: Lineage: lineitem_test.l_partkey EXPRESSION []
+POSTHOOK: Lineage: lineitem_test.l_quantity EXPRESSION []
+POSTHOOK: Lineage: lineitem_test.l_receiptdate EXPRESSION []
+POSTHOOK: Lineage: lineitem_test.l_returnflag EXPRESSION []
+POSTHOOK: Lineage: lineitem_test.l_shipdate EXPRESSION []
+POSTHOOK: Lineage: lineitem_test.l_shipinstruct EXPRESSION []
+POSTHOOK: Lineage: lineitem_test.l_shipmode EXPRESSION []
+POSTHOOK: Lineage: lineitem_test.l_suppkey EXPRESSION []
+POSTHOOK: Lineage: lineitem_test.l_tax EXPRESSION []
+_col0 _col1 _col2 _col3 _col4 _col5 _col6 _col7 _col8 _col9 _col10 _col11 _col12 _col13 _col14 _col15
+PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
+SELECT
+ L_QUANTITY as Quantity,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN "Many"
+ ELSE "Huge number" END AS Quantity_Description,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN "Many"
+ ELSE NULL END AS Quantity_Description_2,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN NULL
+ ELSE NULL END AS Quantity_Description_3,
+ IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date,
+ CASE WHEN L_RETURNFLAG = "N"
+ THEN l_extendedprice * (1 - l_discount)
+ ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting.
+ CASE WHEN L_RETURNFLAG = "N"
+ THEN l_extendedprice * (1 - l_discount)
+ ELSE CAST(0 AS DOUBLE) END AS Field_2,
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4,
+ -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal.
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6,
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8,
+ IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9,
+ IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10,
+ IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11,
+ IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12
+FROM lineitem_test
+ORDER BY Quantity
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
+SELECT
+ L_QUANTITY as Quantity,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN "Many"
+ ELSE "Huge number" END AS Quantity_Description,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN "Many"
+ ELSE NULL END AS Quantity_Description_2,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN NULL
+ ELSE NULL END AS Quantity_Description_3,
+ IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date,
+ CASE WHEN L_RETURNFLAG = "N"
+ THEN l_extendedprice * (1 - l_discount)
+ ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting.
+ CASE WHEN L_RETURNFLAG = "N"
+ THEN l_extendedprice * (1 - l_discount)
+ ELSE CAST(0 AS DOUBLE) END AS Field_2,
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4,
+ -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal.
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6,
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8,
+ IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9,
+ IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10,
+ IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11,
+ IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12
+FROM lineitem_test
+ORDER BY Quantity
+POSTHOOK: type: QUERY
+Explain
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: lineitem_test
+ Statistics: Num rows: 101 Data size: 78920 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: l_quantity (type: int), CASE WHEN ((l_quantity = 1)) THEN ('Single') WHEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') WHEN ((l_quantity < 100)) THEN ('Many') ELSE ('Huge number') END (type: string), CASE WHEN ((l_quantity = 1)) THEN ('Single') WHEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') WHEN ((l_quantity < 100)) THEN ('Many') ELSE (null) END (type: string), CASE WHEN ((l_quantity = 1)) THEN ('Single') WHEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') WHEN ((l_quantity < 100)) THEN (null) ELSE (null) END (type: string), if((l_shipmode = 'SHIP '), date_add(l_shipdate, 10), date_add(l_shipdate, 5)) (type: date), CASE WHEN ((l_returnflag = 'N')) THEN ((l_extendedprice * (1.0D - l_discount))) ELSE (0) END (type: double), CASE WHEN ((l_returnflag = 'N')) THEN ((l_extendedprice * (1.0D - l_discount))) ELSE (0.0D) END (type: double), if((UDFToString(l_shipinstruct) = 'DELIVER
IN PERSON'), null, l_tax) (type: decimal(10,2)), if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, null) (type: decimal(10,2)), if((UDFToString(l_shipinstruct) = 'DELIVER IN PERSON'), 0, l_tax) (type: decimal(12,2)), if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, 0) (type: decimal(12,2)), if((UDFToString(l_shipinstruct) = 'DELIVER IN PERSON'), 0, l_tax) (type: decimal(10,2)), if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, 0) (type: decimal(10,2)), if((l_partkey > 30), CAST( l_receiptdate AS TIMESTAMP), CAST( l_commitdate AS TIMESTAMP)) (type: timestamp), if((l_suppkey > 10000), datediff(l_receiptdate, l_commitdate), null) (type: int), if((l_suppkey > 10000), null, datediff(l_receiptdate, l_commitdate)) (type: int), if(((l_suppkey % 500) > 100), DATE'2009-01-01', DATE'2009-12-31') (type: date)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16
+ Statistics: Num rows: 101 Data size: 78920 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Statistics: Num rows: 101 Data size: 78920 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: date), _col5 (type: double), _col6 (type: double), _col7 (type: decimal(10,2)), _col8 (type: decimal(10,2)), _col9 (type: decimal(12,2)), _col10 (type: decimal(12,2)), _col11 (type: decimal(10,2)), _col12 (type: decimal(10,2)), _col13 (type: timestamp), _col14 (type: int), _col15 (type: int), _col16 (type: date)
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ notVectorizedReason: SELECT operator: Unexpected primitive type category VOID
+ vectorized: false
+ Reduce Vectorization:
+ enabled: false
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
+ enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: date), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: decimal(10,2)), VALUE._col7 (type: decimal(10,2)), VALUE._col8 (type: decimal(12,2)), VALUE._col9 (type: decimal(12,2)), VALUE._col10 (type: decimal(10,2)), VALUE._col11 (type: decimal(10,2)), VALUE._col12 (type: timestamp), VALUE._col13 (type: int), VALUE._col14 (type: int), VALUE._col15 (type: date)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16
+ Statistics: Num rows: 101 Data size: 78920 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 101 Data size: 78920 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT
+ L_QUANTITY as Quantity,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN "Many"
+ ELSE "Huge number" END AS Quantity_Description,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN "Many"
+ ELSE NULL END AS Quantity_Description_2,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN NULL
+ ELSE NULL END AS Quantity_Description_3,
+ IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date,
+ CASE WHEN L_RETURNFLAG = "N"
+ THEN l_extendedprice * (1 - l_discount)
+ ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting.
+ CASE WHEN L_RETURNFLAG = "N"
+ THEN l_extendedprice * (1 - l_discount)
+ ELSE CAST(0 AS DOUBLE) END AS Field_2,
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4,
+ -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal.
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6,
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8,
+ IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9,
+ IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10,
+ IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11,
+ IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12
+FROM lineitem_test
+ORDER BY Quantity
+PREHOOK: type: QUERY
+PREHOOK: Input: default@lineitem_test
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT
+ L_QUANTITY as Quantity,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN "Many"
+ ELSE "Huge number" END AS Quantity_Description,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN "Many"
+ ELSE NULL END AS Quantity_Description_2,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN NULL
+ ELSE NULL END AS Quantity_Description_3,
+ IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date,
+ CASE WHEN L_RETURNFLAG = "N"
+ THEN l_extendedprice * (1 - l_discount)
+ ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting.
+ CASE WHEN L_RETURNFLAG = "N"
+ THEN l_extendedprice * (1 - l_discount)
+ ELSE CAST(0 AS DOUBLE) END AS Field_2,
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4,
+ -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal.
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6,
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8,
+ IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9,
+ IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10,
+ IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11,
+ IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12
+FROM lineitem_test
+ORDER BY Quantity
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@lineitem_test
+#### A masked pattern was here ####
+quantity quantity_description quantity_description_2 quantity_description_3 expected_date field_1 field_2 field_3 field_4 field_5 field_6 field_7 field_8 field_9 field_10 field_11 field_12
+NULL Huge number NULL NULL NULL 0.0 0.0 NULL NULL NULL 0.00 NULL 0.00 NULL NULL NULL 2009-12-31
+1 Single Single Single 1994-12-06 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-12-15 00:00:00 NULL 3 2009-01-01
+1 Single Single Single 1994-01-31 0.0 0.0 0.05 0.05 0.05 0.05 0.05 0.05 1994-01-28 00:00:00 NULL -36 2009-01-01
+2 Two Two Two 1995-08-12 2011.3912000000003 2011.3912000000003 NULL NULL 0.00 0.00 0.00 0.00 1995-08-23 00:00:00 NULL -45 2009-01-01
+2 Two Two Two 1993-12-09 0.0 0.0 0.06 NULL 0.06 0.00 0.06 0.00 1994-01-01 00:00:00 NULL -6 2009-01-01
+3 Some Some Some 1994-06-11 0.0 0.0 0.04 NULL 0.04 0.00 0.04 0.00 1994-06-15 00:00:00 NULL -42 2009-12-31
+3 Some Some Some 1998-06-02 5137.6143 5137.6143 0.07 NULL 0.07 0.00 0.07 0.00 1998-06-02 00:00:00 NULL 60 2009-01-01
+3 Some Some Some 1998-07-09 2778.921 2778.921 0.02 NULL 0.02 0.00 0.02 0.00 1998-07-21 00:00:00 NULL 46 2009-12-31
+4 Some Some Some 1995-08-09 5990.4936 5990.4936 0.03 NULL 0.03 0.00 0.03 0.00 1995-09-03 00:00:00 NULL -28 2009-01-01
+4 Some Some Some 1997-04-27 5669.7732000000005 5669.7732000000005 0.04 NULL 0.04 0.00 0.04 0.00 1997-04-20 00:00:00 NULL 79 2009-01-01
+5 Some Some Some 1997-02-25 8116.96 8116.96 NULL NULL 0.00 0.00 0.00 0.00 1997-02-21 00:00:00 NULL 9 2009-01-01
+5 Some Some Some 1996-02-15 6217.103999999999 6217.103999999999 0.02 NULL 0.02 0.00 0.02 0.00 1996-02-13 00:00:00 NULL -42 2009-01-01
+5 Some Some Some 1993-12-14 0.0 0.0 0.03 0.03 0.03 0.03 0.03 0.03 1993-12-23 00:00:00 NULL -2 2009-01-01
+6 Some Some Some 1998-11-04 9487.6152 9487.6152 0.06 NULL 0.06 0.00 0.06 0.00 1998-11-05 00:00:00 NULL 46 2009-12-31
+6 Some Some Some 1995-07-26 8793.2736 8793.2736 0.03 NULL 0.03 0.00 0.03 0.00 1995-07-25 00:00:00 NULL -60 2009-01-01
+7 Some Some Some 1996-01-24 12613.136199999999 12613.136199999999 0.04 NULL 0.04 0.00 0.04 0.00 1996-01-29 00:00:00 NULL 38 2009-01-01
+8 Some Some Some 1994-01-17 0.0 0.0 0.08 0.08 0.08 0.08 0.08 0.08 1994-01-14 00:00:00 NULL -44 2009-01-01
+8 Some Some Some 1996-02-03 11978.640000000001 11978.640000000001 0.02 0.02 0.02 0.02 0.02 0.02 1996-01-31 00:00:00 NULL -34 2009-01-01
+9 Some Some Some 1996-02-11 10666.6272 10666.6272 0.08 0.08 0.08 0.08 0.08 0.08 1996-02-19 00:00:00 NULL -12 2009-01-01
+11 Many Many NULL 1994-03-22 0.0 0.0 0.05 NULL 0.05 0.00 0.05 0.00 1994-03-27 00:00:00 NULL 10 2009-01-01
+12 Many Many NULL 1996-05-12 12655.998 12655.998 0.03 0.03 0.03 0.03 0.03 0.03 1996-06-03 00:00:00 NULL 82 2009-01-01
+12 Many Many NULL 1997-02-01 12156.034800000001 12156.034800000001 0.05 NULL 0.05 0.00 0.05 0.00 1997-02-22 00:00:00 NULL 1 2009-01-01
+13 Many Many NULL 1994-03-08 0.0 0.0 0.06 NULL 0.06 0.00 0.06 0.00 1994-03-26 00:00:00 NULL 41 2009-01-01
+13 Many Many NULL 1998-10-28 17554.68 17554.68 0.07 NULL 0.07 0.00 0.07 0.00 1998-11-06 00:00:00 NULL 53 2009-01-01
+13 Many Many NULL 1993-04-06 0.0 0.0 0.02 NULL 0.02 0.00 0.02 0.00 1993-04-08 00:00:00 NULL 4 2009-01-01
+14 Many Many NULL 1995-01-04 0.0 0.0 0.02 NULL 0.02 0.00 0.02 0.00 1995-01-27 00:00:00 NULL 66 2009-01-01
+15 Many Many NULL 1994-11-05 0.0 0.0 0.04 NULL 0.04 0.00 0.04 0.00 1994-11-20 00:00:00 NULL 81 2009-12-31
+17 Many Many NULL 1996-03-18 20321.500799999998 20321.500799999998 NULL NULL 0.00 0.00 0.00 0.00 1996-03-22 00:00:00 NULL 39 2009-01-01
+17 Many Many NULL 1994-07-07 0.0 0.0 0.00 0.00 0.00 0.00 0.00 0.00 1994-07-03 00:00:00 NULL -4 2009-01-01
+19 Many Many NULL 1993-05-19 0.0 0.0 0.08 0.08 0.08 0.08 0.08 0.08 1993-05-25 00:00:00 NULL 81 2009-01-01
+19 Many Many NULL 1994-02-05 0.0 0.0 0.03 0.03 0.03 0.03 0.03 0.03 1994-02-06 00:00:00 NULL -11 2009-01-01
+20 Many Many NULL 1998-07-02 32042.592 32042.592 0.01 NULL 0.01 0.00 0.01 0.00 1998-07-02 00:00:00 NULL 40 2009-01-01
+21 Many Many NULL 1995-07-11 24640.0518 24640.0518 NULL NULL 0.00 0.00 0.00 0.00 1995-07-31 00:00:00 NULL 78 2009-01-01
+21 Many Many NULL 1994-10-05 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-10-26 00:00:00 NULL 38 2009-01-01
+22 Many Many NULL 1998-10-14 28405.0184 28405.0184 0.06 NULL 0.06 0.00 0.06 0.00 1998-10-12 00:00:00 NULL -4 2009-01-01
+22 Many Many NULL 1995-07-22 39353.82 39353.82 0.05 NULL 0.05 0.00 0.05 0.00 1995-07-19 00:00:00 NULL 45 2009-01-01
+23 Many Many NULL 1997-04-24 33946.3785 33946.3785 NULL NULL 0.00 0.00 0.00 0.00 1997-05-06 00:00:00 NULL 81 2009-01-01
+23 Many Many NULL 1994-10-13 0.0 0.0 0.00 NULL 0.00 0.00 0.00 0.00 1994-10-24 00:00:00 NULL 79 2009-12-31
+23 Many Many NULL 1994-07-24 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-07-25 00:00:00 NULL 26 2009-01-01
+24 Many Many NULL 1996-04-04 20542.032 20542.032 0.04 NULL 0.04 0.00 0.04 0.00 1996-04-01 00:00:00 NULL 18 2009-12-31
+24 Many Many NULL 1996-02-26 31762.584 31762.584 0.00 0.00 0.00 0.00 0.00 0.00 1996-03-18 00:00:00 NULL 75 2009-01-01
+25 Many Many NULL 1998-04-15 43064.1575 43064.1575 0.07 NULL 0.07 0.00 0.07 0.00 1998-04-11 00:00:00 NULL -11 2009-01-01
+25 Many Many NULL 1995-12-06 27263.995 27263.995 NULL NULL 0.00 0.00 0.00 0.00 1995-12-21 00:00:00 NULL -4 2009-01-01
+26 Many Many NULL 1996-11-09 39912.433600000004 39912.433600000004 0.04 NULL 0.04 0.00 0.04 0.00 1996-11-20 00:00:00 NULL 31 2009-01-01
+26 Many Many NULL 1995-04-25 0.0 0.0 0.03 NULL 0.03 0.00 0.03 0.00 1995-05-13 00:00:00 NULL 18 2009-01-01
+26 Many Many NULL 1994-10-21 0.0 0.0 0.08 NULL 0.08 0.00 0.08 0.00 1994-10-19 00:00:00 NULL 24 2009-01-01
+26 Many Many NULL 1993-11-03 0.0 0.0 0.02 0.02 0.02 0.02 0.02 0.02 1993-11-04 00:00:00 NULL -44 2009-01-01
+27 Many Many NULL 1994-01-26 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-01-23 00:00:00 NULL 62 2009-01-01
+27 Many Many NULL 1998-06-29 45590.2425 45590.2425 NULL NULL 0.00 0.00 0.00 0.00 1998-06-29 00:00:00 NULL 4 2009-01-01
+28 Many Many NULL 1995-10-28 44866.219999999994 44866.219999999994 0.08 0.08 0.08 0.08 0.08 0.08 1995-10-26 00:00:00 NULL 60 2009-01-01
+28 Many Many NULL 1994-12-29 0.0 0.0 0.07 NULL 0.07 0.00 0.07 0.00 1995-01-16 00:00:00 NULL 83 2009-01-01
+28 Many Many NULL 1996-04-26 26349.6324 26349.6324 0.06 NULL 0.06 0.00 0.06 0.00 1996-05-16 00:00:00 NULL 47 2009-01-01
+28 Many Many NULL 1996-03-26 30855.6612 30855.6612 0.04 NULL 0.04 0.00 0.04 0.00 1996-04-20 00:00:00 NULL 12 2009-12-31
+28 Many Many NULL 1993-12-19 0.0 0.0 0.00 0.00 0.00 0.00 0.00 0.00 1994-01-01 00:00:00 NULL -9 2009-01-01
+28 Many Many NULL 1996-02-06 45975.3616 45975.3616 0.02 NULL 0.02 0.00 0.02 0.00 1996-02-28 00:00:00 NULL 66 2009-01-01
+29 Many Many NULL 1997-01-30 39341.806 39341.806 NULL NULL 0.00 0.00 0.00 0.00 1997-01-27 00:00:00 NULL 0 2009-01-01
+30 Many Many NULL 1994-06-08 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-06-22 00:00:00 NULL 24 2009-01-01
+30 Many Many NULL 1996-01-15 29770.173 29770.173 NULL NULL 0.00 0.00 0.00 0.00 1996-01-18 00:00:00 NULL 35 2009-12-31
+30 Many Many NULL 1998-08-16 44561.46 44561.46 0.06 NULL 0.06 0.00 0.06 0.00 1998-08-14 00:00:00 NULL 34 2009-12-31
+31 Many Many NULL 1994-02-24 0.0 0.0 0.08 0.08 0.08 0.08 0.08 0.08 1994-02-20 00:00:00 NULL -19 2009-01-01
+31 Many Many NULL 1993-11-03 0.0 0.0 0.04 NULL 0.04 0.00 0.04 0.00 1993-11-08 00:00:00 NULL -41 2009-01-01
+32 Many Many NULL 1995-08-19 63313.3312 63313.3312 0.00 NULL 0.00 0.00 0.00 0.00 1995-08-27 00:00:00 NULL -41 2009-01-01
+32 Many Many NULL 1993-12-14 0.0 0.0 0.05 NULL 0.05 0.00 0.05 0.00 1993-12-28 00:00:00 NULL -7 2009-12-31
+32 Many Many NULL 1994-08-29 0.0 0.0 0.06 NULL 0.06 0.00 0.06 0.00 1994-08-31 00:00:00 NULL 14 2009-01-01
+32 Many Many NULL 1996-10-07 44955.15839999999 44955.15839999999 0.05 NULL 0.05 0.00 0.05 0.00 1996-10-14 00:00:00 NULL -66 2009-12-31
+32 Many Many NULL 1996-02-04 46146.7488 46146.7488 NULL NULL 0.00 0.00 0.00 0.00 1996-02-03 00:00:00 NULL -4 2009-01-01
+33 Many Many NULL 1998-04-17 54174.12 54174.12 0.01 NULL 0.01 0.00 0.01 0.00 1998-04-15 00:00:00 NULL 26 2009-01-01
+34 Many Many NULL 1998-03-10 56487.763199999994 56487.763199999994 NULL NULL 0.00 0.00 0.00 0.00 1998-03-30 00:00:00 NULL -23 2009-01-01
+34 Many Many NULL 1996-01-27 63982.002400000005 63982.002400000005 NULL NULL 0.00 0.00 0.00 0.00 1996-01-27 00:00:00 NULL 21 2009-01-01
+34 Many Many NULL 1995-11-13 60586.5448 60586.5448 0.06 NULL 0.06 0.00 0.06 0.00 1995-11-26 00:00:00 NULL -50 2009-01-01
+35 Many Many NULL 1996-01-21 40475.225 40475.225 0.03 0.03 0.03 0.03 0.03 0.03 1996-01-22 00:00:00 NULL -32 2009-01-01
+36 Many Many NULL 1996-04-17 41844.6756 41844.6756 0.06 0.06 0.06 0.06 0.06 0.06 1996-04-20 00:00:00 NULL 52 2009-01-01
+37 Many Many NULL 1994-02-18 0.0 0.0 0.04 NULL 0.04 0.00 0.04 0.00 1994-02-21 00:00:00 NULL -23 2009-01-01
+37 Many Many NULL 1993-04-23 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1993-04-14 00:00:00 NULL 15 2009-12-31
+37 Many Many NULL 1992-05-02 0.0 0.0 0.03 0.03 0.03 0.03 0.03 0.03 1992-05-02 00:00:00 NULL -13 2009-01-01
+38 Many Many NULL 1997-02-02 44694.46 44694.46 0.05 0.05 0.05 0.05 0.05 0.05 1997-02-02 00:00:00 NULL 19 2009-01-01
+38 Many Many NULL 1996-02-16 68028.3144 68028.3144 NULL NULL 0.00 0.00 0.00 0.00 1996-02-18 00:00:00 NULL -6 2009-01-01
+39 Many Many NULL 1998-02-03 45146.01 45146.01 NULL NULL 0.00 0.00 0.00 0.00 1998-02-18 00:00:00 NULL -48 2009-01-01
+39 Many Many NULL 1992-07-07 0.0 0.0 0.02 0.02 0.02 0.02 0.02 0.02 1992-07-28 00:00:00 NULL -21 2009-01-01
+40 Many Many NULL 1996-12-13 51224.736 51224.736 0.05 NULL 0.05 0.00 0.05 0.00 1997-01-01 00:00:00 NULL 71 2009-01-01
+40 Many Many NULL 1992-07-26 0.0 0.0 0.03 NULL 0.03 0.00 0.03 0.00 1992-08-15 00:00:00 NULL 14 2009-01-01
+41 Many Many NULL 1998-07-04 47989.6144 47989.6144 0.08 NULL 0.08 0.00 0.08 0.00 1998-07-06 00:00:00 NULL 9 2009-01-01
+41 Many Many NULL 1994-02-26 0.0 0.0 0.07 NULL 0.07 0.00 0.07 0.00 1994-03-18 00:00:00 NULL 17 2009-01-01
+41 Many Many NULL 1993-11-14 0.0 0.0 0.00 0.00 0.00 0.00 0.00 0.00 1993-11-11 00:00:00 NULL -74 2009-01-01
+42 Many Many NULL 1994-08-05 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-08-28 00:00:00 NULL 33 2009-12-31
+42 Many Many NULL 1996-02-13 68289.9672 68289.9672 0.00 NULL 0.00 0.00 0.00 0.00 1996-02-23 00:00:00 NULL 33 2009-01-01
+43 Many Many NULL 1996-10-22 62727.3207 62727.3207 0.01 NULL 0.01 0.00 0.01 0.00 1996-10-26 00:00:00 NULL -19 2009-12-31
+43 Many Many NULL 1992-07-15 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1992-08-02 00:00:00 NULL 27 2009-01-01
+44 Many Many NULL 1997-03-23 60781.124800000005 60781.124800000005 NULL NULL 0.00 0.00 0.00 0.00 1997-04-13 00:00:00 NULL 74 2009-12-31
+44 Many Many NULL 1996-10-04 80882.4192 80882.4192 0.02 NULL 0.02 0.00 0.02 0.00 1996-09-30 00:00:00 NULL -48 2009-01-01
+44 Many Many NULL 1995-09-02 75106.658 75106.658 NULL NULL 0.00 0.00 0.00 0.00 1995-09-14 00:00:00 NULL 25 2009-01-01
+44 Many Many NULL 1996-11-19 48941.692800000004 48941.692800000004 0.06 NULL 0.06 0.00 0.06 0.00 1996-12-12 00:00:00 NULL -3 2009-01-01
+45 Many Many NULL 1998-03-05 61489.35 61489.35 NULL NULL 0.00 0.00 0.00 0.00 1998-03-24 00:00:00 NULL 4 2009-01-01
+45 Many Many NULL 1994-02-07 0.0 0.0 0.00 NULL 0.00 0.00 0.00 0.00 1994-02-23 00:00:00 NULL 50 2009-01-01
+46 Many Many NULL 1996-01-20 73475.892 73475.892 0.07 NULL 0.07 0.00 0.07 0.00 1996-02-03 00:00:00 NULL -53 2009-01-01
+46 Many Many NULL 1998-07-01 56583.5144 56583.5144 0.05 NULL 0.05 0.00 0.05 0.00 1998-07-05 00:00:00 NULL 28 2009-01-01
+46 Many Many NULL 1998-08-18 84565.5168 84565.5168 0.05 NULL 0.05 0.00 0.05 0.00 1998-08-29 00:00:00 NULL 52 2009-01-01
+46 Many Many NULL 1996-10-01 77781.4092 77781.4092 NULL NULL 0.00 0.00 0.00 0.00 1996-10-26 00:00:00 NULL -54 2009-01-01
+48 Many Many NULL 1994-08-22 0.0 0.0 0.07 NULL 0.07 0.00 0.07 0.00 1994-09-08 00:00:00 NULL 28 2009-01-01
+49 Many Many NULL 1993-11-14 0.0 0.0 0.00 0.00 0.00 0.00 0.00 0.00 1993-11-24 00:00:00 NULL -26 2009-12-31
+50 Many Many NULL 1994-08-13 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-08-26 00:00:00 NULL -48 2009-12-31
+PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
+SELECT
+ L_QUANTITY as Quantity,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN "Many"
+ ELSE "Huge number" END AS Quantity_Description,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN "Many"
+ ELSE NULL END AS Quantity_Description_2,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN NULL
+ ELSE NULL END AS Quantity_Description_3,
+ IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date,
+ CASE WHEN L_RETURNFLAG = "N"
+ THEN l_extendedprice * (1 - l_discount)
+ ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting.
+ CASE WHEN L_RETURNFLAG = "N"
+ THEN l_extendedprice * (1 - l_discount)
+ ELSE CAST(0 AS DOUBLE) END AS Field_2,
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4,
+ -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal.
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6,
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8,
+ IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9,
+ IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10,
+ IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11,
+ IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12
+FROM lineitem_test
+ORDER BY Quantity
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
+SELECT
+ L_QUANTITY as Quantity,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN "Many"
+ ELSE "Huge number" END AS Quantity_Description,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN "Many"
+ ELSE NULL END AS Quantity_Description_2,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN NULL
+ ELSE NULL END AS Quantity_Description_3,
+ IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date,
+ CASE WHEN L_RETURNFLAG = "N"
+ THEN l_extendedprice * (1 - l_discount)
+ ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting.
+ CASE WHEN L_RETURNFLAG = "N"
+ THEN l_extendedprice * (1 - l_discount)
+ ELSE CAST(0 AS DOUBLE) END AS Field_2,
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4,
+ -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal.
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6,
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8,
+ IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9,
+ IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10,
+ IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11,
+ IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12
+FROM lineitem_test
+ORDER BY Quantity
+POSTHOOK: type: QUERY
+Explain
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: lineitem_test
+ Statistics: Num rows: 101 Data size: 78920 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:l_orderkey:int, 1:l_partkey:int, 2:l_suppkey:int, 3:l_linenumber:int, 4:l_quantity:int, 5:l_extendedprice:double, 6:l_discount:double, 7:l_tax:decimal(10,2), 8:l_returnflag:char(1), 9:l_linestatus:char(1), 10:l_shipdate:date, 11:l_commitdate:date, 12:l_receiptdate:date, 13:l_shipinstruct:varchar(20), 14:l_shipmode:char(10), 15:l_comment:string, 16:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>]
+ Select Operator
+ expressions: l_quantity (type: int), CASE WHEN ((l_quantity = 1)) THEN ('Single') WHEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') WHEN ((l_quantity < 100)) THEN ('Many') ELSE ('Huge number') END (type: string), CASE WHEN ((l_quantity = 1)) THEN ('Single') WHEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') WHEN ((l_quantity < 100)) THEN ('Many') ELSE (null) END (type: string), CASE WHEN ((l_quantity = 1)) THEN ('Single') WHEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') WHEN ((l_quantity < 100)) THEN (null) ELSE (null) END (type: string), if((l_shipmode = 'SHIP '), date_add(l_shipdate, 10), date_add(l_shipdate, 5)) (type: date), CASE WHEN ((l_returnflag = 'N')) THEN ((l_extendedprice * (1.0D - l_discount))) ELSE (0) END (type: double), CASE WHEN ((l_returnflag = 'N')) THEN ((l_extendedprice * (1.0D - l_discount))) ELSE (0.0D) END (type: double), if((UDFToString(l_shipinstruct) = 'DELIVER
IN PERSON'), null, l_tax) (type: decimal(10,2)), if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, null) (type: decimal(10,2)), if((UDFToString(l_shipinstruct) = 'DELIVER IN PERSON'), 0, l_tax) (type: decimal(12,2)), if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, 0) (type: decimal(12,2)), if((UDFToString(l_shipinstruct) = 'DELIVER IN PERSON'), 0, l_tax) (type: decimal(10,2)), if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, 0) (type: decimal(10,2)), if((l_partkey > 30), CAST( l_receiptdate AS TIMESTAMP), CAST( l_commitdate AS TIMESTAMP)) (type: timestamp), if((l_suppkey > 10000), datediff(l_receiptdate, l_commitdate), null) (type: int), if((l_suppkey > 10000), null, datediff(l_receiptdate, l_commitdate)) (type: int), if(((l_suppkey % 500) > 100), DATE'2009-01-01', DATE'2009-12-31') (type: date)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [4, 22, 24, 25, 26, 27, 28, 30, 31, 32, 33, 34, 35, 38, 40, 43, 44]
+ selectExpressions: IfExprStringScalarStringGroupColumn(col 17:boolean, val Singlecol 21:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 17:boolean, IfExprStringScalarStringGroupColumn(col 18:boolean, val Twocol 22:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 18:boolean, IfExprStringScalarStringGroupColumn(col 19:boolean, val Somecol 21:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 19:boolean, IfExprStringScalarStringScalar(col 20:boolean, val Many, val Huge number)(children: LongColLessLongScalar(col 4:int, val 100) -> 20:boolean) -> 21:string) -> 22:string) -> 21:string) -> 22:string, IfExprStringScalarStringGroupColumn(col 17:boolean, val Singlecol 23:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 17:boolean, IfExprStringScalarStringGroupColumn(col 18:boolean, val Twocol 24:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 18:boolean, IfExprStringScalarStringGroupColumn(col 19:boolean, val
Somecol 23:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 19:boolean, IfExprColumnNull(col 20:boolean, col 21:string, null)(children: LongColLessLongScalar(col 4:int, val 100) -> 20:boolean, ConstantVectorExpression(val Many) -> 21:string) -> 23:string) -> 24:string) -> 23:string) -> 24:string, IfExprStringScalarStringGroupColumn(col 17:boolean, val Singlecol 23:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 17:boolean, IfExprStringScalarStringGroupColumn(col 18:boolean, val Twocol 25:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 18:boolean, IfExprStringScalarStringGroupColumn(col 19:boolean, val Somecol 23:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 19:boolean, IfExprNullNull(null, null) -> 23:string) -> 25:string) -> 23:string) -> 25:string, IfExprLongColumnLongColumn(col 17:boolean, col 18:date, col 19:date)(children: StringGroupColEqualCharScalar(col 14:char(10), val SHIP) -> 17:boolean, VectorUDFDateAddColScalar(co
l 10:date, val 10) -> 18:date, VectorUDFDateAddColScalar(col 10:date, val 5) -> 19:date) -> 26:date, IfExprDoubleColumnLongScalar(col 17:boolean, col 28:double, val 0)(children: StringGroupColEqualCharScalar(col 8:char(1), val N) -> 17:boolean, DoubleColMultiplyDoubleColumn(col 5:double, col 27:double)(children: DoubleScalarSubtractDoubleColumn(val 1.0, col 6:double) -> 27:double) -> 28:double) -> 27:double, IfExprDoubleColumnDoubleScalar(col 17:boolean, col 29:double, val 0.0)(children: StringGroupColEqualCharScalar(col 8:char(1), val N) -> 17:boolean, DoubleColMultiplyDoubleColumn(col 5:double, col 28:double)(children: DoubleScalarSubtractDoubleColumn(val 1.0, col 6:double) -> 28:double) -> 29:double) -> 28:double, IfExprNullColumn(col 17:boolean, null, col 7)(children: StringGroupColEqualStringScalar(col 23:string, val DELIVER IN PERSON)(children: CastStringGroupToString(col 13:varchar(20)) -> 23:string) -> 17:boolean, col 7:decimal(10,2)) -> 30:decimal(10,2), IfExprColumnNull(co
l 18:boolean, col 7:decimal(10,2), null)(children: StringGroupColEqualStringScalar(col 23:string, val TAKE BACK RETURN)(children: CastStringGroupToString(col 13:varchar(20)) -> 23:string) -> 18:boolean, col 7:decimal(10,2)) -> 31:decimal(10,2), VectorUDFAdaptor(if((UDFToString(l_shipinstruct) = 'DELIVER IN PERSON'), 0, l_tax))(children: StringGroupColEqualStringScalar(col 23:string, val DELIVER IN PERSON)(children: CastStringGroupToString(col 13:varchar(20)) -> 23:string) -> 19:boolean) -> 32:decimal(12,2), VectorUDFAdaptor(if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, 0))(children: StringGroupColEqualStringScalar(col 23:string, val TAKE BACK RETURN)(children: CastStringGroupToString(col 13:varchar(20)) -> 23:string) -> 19:boolean) -> 33:decimal(12,2), VectorUDFAdaptor(if((UDFToString(l_shipinstruct) = 'DELIVER IN PERSON'), 0, l_tax))(children: StringGroupColEqualStringScalar(col 23:string, val DELIVER IN PERSON)(children: CastStringGroupToString(col 13:varchar(20))
-> 23:string) -> 19:boolean) -> 34:decimal(10,2), VectorUDFAdaptor(if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, 0))(children: StringGroupColEqualStringScalar(col 23:string, val TAKE BACK RETURN)(children: CastStringGroupToString(col 13:varchar(20)) -> 23:string) -> 19:boolean) -> 35:decimal(10,2), IfExprTimestampColumnColumn(col 19:boolean, col 36:timestampcol 37:timestamp)(children: LongColGreaterLongScalar(col 1:int, val 30) -> 19:boolean, CastDateToTimestamp(col 12:date) -> 36:timestamp, CastDateToTimestamp(col 11:date) -> 37:timestamp) -> 38:timestamp, IfExprColumnNull(col 19:boolean, col 39:int, null)(children: LongColGreaterLongScalar(col 2:int, val 10000) -> 19:boolean, VectorUDFDateDiffColCol(col 12:date, col 11:date) -> 39:int) -> 40:int, IfExprNullColumn(col 41:boolean, null, col 42)(children: LongColGreaterLongScalar(col 2:int, val 10000) -> 41:boolean, VectorUDFDateDiffColCol(col 12:date, col 11:date) -> 42:int) -> 43:int, IfExprLongScalarLongScalar(col
45:boolean, val 14245, val 14609)(children: LongColGreaterLongScalar(col 44:int, val 100)(children: LongColModuloLongScalar(col 2:int, val 500) -> 44:int) -> 45:boolean) -> 44:date
+ Statistics: Num rows: 101 Data size: 78920 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
+ Statistics: Num rows: 101 Data size: 78920 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: date), _col5 (type: double), _col6 (type: double), _col7 (type: decimal(10,2)), _col8 (type: decimal(10,2)), _col9 (type: decimal(12,2)), _col10 (type: decimal(12,2)), _col11 (type: decimal(10,2)), _col12 (type: decimal(10,2)), _col13 (type: timestamp), _col14 (type: int), _col15 (type: int), _col16 (type: date)
+ Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: []
+ featureSupportInUse: []
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: true
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 16
+ includeColumns: [1, 2, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14]
+ dataColumns: l_orderkey:int, l_partkey:int, l_suppkey:int, l_linenumber:int, l_quantity:int, l_extendedprice:double, l_discount:double, l_tax:decimal(10,2), l_returnflag:char(1), l_linestatus:char(1), l_shipdate:date, l_commitdate:date, l_receiptdate:date, l_shipinstruct:varchar(20), l_shipmode:char(10), l_comment:string
+ partitionColumnCount: 0
+ scratchColumnTypeNames: [bigint, bigint, bigint, bigint, string, string, string, string, string, bigint, double, double, double, decimal(10,2), decimal(10,2), decimal(12,2), decimal(12,2), decimal(10,2), decimal(10,2), timestamp, timestamp, timestamp, bigint, bigint, bigint, bigint, bigint, bigint, bigint]
+ Reduce Vectorization:
+ enabled: false
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
+ enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: date), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: decimal(10,2)), VALUE._col7 (type: decimal(10,2)), VALUE._col8 (type: decimal(12,2)), VALUE._col9 (type: decimal(12,2)), VALUE._col10 (type: decimal(10,2)), VALUE._col11 (type: decimal(10,2)), VALUE._col12 (type: timestamp), VALUE._col13 (type: int), VALUE._col14 (type: int), VALUE._col15 (type: date)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16
+ Statistics: Num rows: 101 Data size: 78920 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 101 Data size: 78920 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT
+ L_QUANTITY as Quantity,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN "Many"
+ ELSE "Huge number" END AS Quantity_Description,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN "Many"
+ ELSE NULL END AS Quantity_Description_2,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN NULL
+ ELSE NULL END AS Quantity_Description_3,
+ IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date,
+ CASE WHEN L_RETURNFLAG = "N"
+ THEN l_extendedprice * (1 - l_discount)
+ ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting.
+ CASE WHEN L_RETURNFLAG = "N"
+ THEN l_extendedprice * (1 - l_discount)
+ ELSE CAST(0 AS DOUBLE) END AS Field_2,
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4,
+ -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal.
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6,
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8,
+ IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9,
+ IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10,
+ IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11,
+ IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12
+FROM lineitem_test
+ORDER BY Quantity
+PREHOOK: type: QUERY
+PREHOOK: Input: default@lineitem_test
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT
+ L_QUANTITY as Quantity,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN "Many"
+ ELSE "Huge number" END AS Quantity_Description,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN "Many"
+ ELSE NULL END AS Quantity_Description_2,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN NULL
+ ELSE NULL END AS Quantity_Description_3,
+ IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date,
+ CASE WHEN L_RETURNFLAG = "N"
+ THEN l_extendedprice * (1 - l_discount)
+ ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting.
+ CASE WHEN L_RETURNFLAG = "N"
+ THEN l_extendedprice * (1 - l_discount)
+ ELSE CAST(0 AS DOUBLE) END AS Field_2,
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4,
+ -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal.
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6,
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8,
+ IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9,
+ IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10,
+ IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11,
+ IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12
+FROM lineitem_test
+ORDER BY Quantity
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@lineitem_test
+#### A masked pattern was here ####
+quantity quantity_description quantity_description_2 quantity_description_3 expected_date field_1 field_2 field_3 field_4 field_5 field_6 field_7 field_8 field_9 field_10 field_11 field_12
+NULL Huge number NULL NULL NULL 0.0 0.0 NULL NULL NULL 0.00 NULL 0.00 NULL NULL NULL 2009-12-31
+1 Single Single Single 1994-12-06 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-12-15 00:00:00 NULL 3 2009-01-01
+1 Single Single Single 1994-01-31 0.0 0.0 0.05 0.05 0.05 0.05 0.05 0.05 1994-01-28 00:00:00 NULL -36 2009-01-01
+2 Two Two Two 1995-08-12 2011.3912000000003 2011.3912000000003 NULL NULL 0.00 0.00 0.00 0.00 1995-08-23 00:00:00 NULL -45 2009-01-01
+2 Two Two Two 1993-12-09 0.0 0.0 0.06 NULL 0.06 0.00 0.06 0.00 1994-01-01 00:00:00 NULL -6 2009-01-01
+3 Some Some Some 1994-06-11 0.0 0.0 0.04 NULL 0.04 0.00 0.04 0.00 1994-06-15 00:00:00 NULL -42 2009-12-31
+3 Some Some Some 1998-06-02 5137.6143 5137.6143 0.07 NULL 0.07 0.00 0.07 0.00 1998-06-02 00:00:00 NULL 60 2009-01-01
+3 Some Some Some 1998-07-09 2778.921 2778.921 0.02 NULL 0.02 0.00 0.02 0.00 1998-07-21 00:00:00 NULL 46 2009-12-31
+4 Some Some Some 1995-08-09 5990.4936 5990.4936 0.03 NULL 0.03 0.00 0.03 0.00 1995-09-03 00:00:00 NULL -28 2009-01-01
+4 Some Some Some 1997-04-27 5669.7732000000005 5669.7732000000005 0.04 NULL 0.04 0.00 0.04 0.00 1997-04-20 00:00:00 NULL 79 2009-01-01
+5 Some Some Some 1997-02-25 8116.96 8116.96 NULL NULL 0.00 0.00 0.00 0.00 1997-02-21 00:00:00 NULL 9 2009-01-01
+5 Some Some Some 1996-02-15 6217.103999999999 6217.103999999999 0.02 NULL 0.02 0.00 0.02 0.00 1996-02-13 00:00:00 NULL -42 2009-01-01
+5 Some Some Some 1993-12-14 0.0 0.0 0.03 0.03 0.03 0.03 0.03 0.03 1993-12-23 00:00:00 NULL -2 2009-01-01
+6 Some Some Some 1998-11-04 9487.6152 9487.6152 0.06 NULL 0.06 0.00 0.06 0.00 1998-11-05 00:00:00 NULL 46 2009-12-31
+6 Some Some Some 1995-07-26 8793.2736 8793.2736 0.03 NULL 0.03 0.00 0.03 0.00 1995-07-25 00:00:00 NULL -60 2009-01-01
+7 Some Some Some 1996-01-24 12613.136199999999 12613.136199999999 0.04 NULL 0.04 0.00 0.04 0.00 1996-01-29 00:00:00 NULL 38 2009-01-01
+8 Some Some Some 1994-01-17 0.0 0.0 0.08 0.08 0.08 0.08 0.08 0.08 1994-01-14 00:00:00 NULL -44 2009-01-01
+8 Some Some Some 1996-02-03 11978.640000000001 11978.640000000001 0.02 0.02 0.02 0.02 0.02 0.02 1996-01-31 00:00:00 NULL -34 2009-01-01
+9 Some Some Some 1996-02-11 10666.6272 10666.6272 0.08 0.08 0.08 0.08 0.08 0.08 1996-02-19 00:00:00 NULL -12 2009-01-01
+11 Many Many NULL 1994-03-22 0.0 0.0 0.05 NULL 0.05 0.00 0.05 0.00 1994-03-27 00:00:00 NULL 10 2009-01-01
+12 Many Many NULL 1996-05-12 12655.998 12655.998 0.03 0.03 0.03 0.03 0.03 0.03 1996-06-03 00:00:00 NULL 82 2009-01-01
+12 Many Many NULL 1997-02-01 12156.034800000001 12156.034800000001 0.05 NULL 0.05 0.00 0.05 0.00 1997-02-22 00:00:00 NULL 1 2009-01-01
+13 Many Many NULL 1994-03-08 0.0 0.0 0.06 NULL 0.06 0.00 0.06 0.00 1994-03-26 00:00:00 NULL 41 2009-01-01
+13 Many Many NULL 1998-10-28 17554.68 17554.68 0.07 NULL 0.07 0.00 0.07 0.00 1998-11-06 00:00:00 NULL 53 2009-01-01
+13 Many Many NULL 1993-04-06 0.0 0.0 0.02 NULL 0.02 0.00 0.02 0.00 1993-04-08 00:00:00 NULL 4 2009-01-01
+14 Many Many NULL 1995-01-04 0.0 0.0 0.02 NULL 0.02 0.00 0.02 0.00 1995-01-27 00:00:00 NULL 66 2009-01-01
+15 Many Many NULL 1994-11-05 0.0 0.0 0.04 NULL 0.04 0.00 0.04 0.00 1994-11-20 00:00:00 NULL 81 2009-12-31
+17 Many Many NULL 1996-03-18 20321.500799999998 20321.500799999998 NULL NULL 0.00 0.00 0.00 0.00 1996-03-22 00:00:00 NULL 39 2009-01-01
+17 Many Many NULL 1994-07-07 0.0 0.0 0.00 0.00 0.00 0.00 0.00 0.00 1994-07-03 00:00:00 NULL -4 2009-01-01
+19 Many Many NULL 1993-05-19 0.0 0.0 0.08 0.08 0.08 0.08 0.08 0.08 1993-05-25 00:00:00 NULL 81 2009-01-01
+19 Many Many NULL 1994-02-05 0.0 0.0 0.03 0.03 0.03 0.03 0.03 0.03 1994-02-06 00:00:00 NULL -11 2009-01-01
+20 Many Many NULL 1998-07-02 32042.592 32042.592 0.01 NULL 0.01 0.00 0.01 0.00 1998-07-02 00:00:00 NULL 40 2009-01-01
+21 Many Many NULL 1995-07-11 24640.0518 24640.0518 NULL NULL 0.00 0.00 0.00 0.00 1995-07-31 00:00:00 NULL 78 2009-01-01
+21 Many Many NULL 1994-10-05 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-10-26 00:00:00 NULL 38 2009-01-01
+22 Many Many NULL 1998-10-14 28405.0184 28405.0184 0.06 NULL 0.06 0.00 0.06 0.00 1998-10-12 00:00:00 NULL -4 2009-01-01
+22 Many Many NULL 1995-07-22 39353.82 39353.82 0.05 NULL 0.05 0.00 0.05 0.00 1995-07-19 00:00:00 NULL 45 2009-01-01
+23 Many Many NULL 1997-04-24 33946.3785 33946.3785 NULL NULL 0.00 0.00 0.00 0.00 1997-05-06 00:00:00 NULL 81 2009-01-01
+23 Many Many NULL 1994-10-13 0.0 0.0 0.00 NULL 0.00 0.00 0.00 0.00 1994-10-24 00:00:00 NULL 79 2009-12-31
+23 Many Many NULL 1994-07-24 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-07-25 00:00:00 NULL 26 2009-01-01
+24 Many Many NULL 1996-04-04 20542.032 20542.032 0.04 NULL 0.04 0.00 0.04 0.00 1996-04-01 00:00:00 NULL 18 2009-12-31
+24 Many Many NULL 1996-02-26 31762.584 31762.584 0.00 0.00 0.00 0.00 0.00 0.00 1996-03-18 00:00:00 NULL 75 2009-01-01
+25 Many Many NULL 1998-04-15 43064.1575 43064.1575 0.07 NULL 0.07 0.00 0.07 0.00 1998-04-11 00:00:00 NULL -11 2009-01-01
+25 Many Many NULL 1995-12-06 27263.995 27263.995 NULL NULL 0.00 0.00 0.00 0.00 1995-12-21 00:00:00 NULL -4 2009-01-01
+26 Many Many NULL 1996-11-09 39912.433600000004 39912.433600000004 0.04 NULL 0.04 0.00 0.04 0.00 1996-11-20 00:00:00 NULL 31 2009-01-01
+26 Many Many NULL 1995-04-25 0.0 0.0 0.03 NULL 0.03 0.00 0.03 0.00 1995-05-13 00:00:00 NULL 18 2009-01-01
+26 Many Many NULL 1994-10-21 0.0 0.0 0.08 NULL 0.08 0.00 0.08 0.00 1994-10-19 00:00:00 NULL 24 2009-01-01
+26 Many Many NULL 1993-11-03 0.0 0.0 0.02 0.02 0.02 0.02 0.02 0.02 1993-11-04 00:00:00 NULL -44 2009-01-01
+27 Many Many NULL 1994-01-26 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-01-23 00:00:00 NULL 62 2009-01-01
+27 Many Many NULL 1998-06-29 45590.2425 45590.2425 NULL NULL 0.00 0.00 0.00 0.00 1998-06-29 00:00:00 NULL 4 2009-01-01
+28 Many Many NULL 1995-10-28 44866.219999999994 44866.219999999994 0.08 0.08 0.08 0.08 0.08 0.08 1995-10-26 00:00:00 NULL 60 2009-01-01
+28 Many Many NULL 1994-12-29 0.0 0.0 0.07 NULL 0.07 0.00 0.07 0.00 1995-01-16 00:00:00 NULL 83 2009-01-01
+28 Many Many NULL 1996-04-26 26349.6324 26349.6324 0.06 NULL 0.06 0.00 0.06 0.00 1996-05-16 00:00:00 NULL 47 2009-01-01
+28 Many Many NULL 1996-03-26 30855.6612 30855.6612 0.04 NULL 0.04 0.00 0.04 0.00 1996-04-20 00:00:00 NULL 12 2009-12-31
+28 Many Many NULL 1993-12-19 0.0 0.0 0.00 0.00 0.00 0.00 0.00 0.00 1994-01-01 00:00:00 NULL -9 2009-01-01
+28 Many Many NULL 1996-02-06 45975.3616 45975.3616 0.02 NULL 0.02 0.00 0.02 0.00 1996-02-28 00:00:00 NULL 66 2009-01-01
+29 Many Many NULL 1997-01-30 39341.806 39341.806 NULL NULL 0.00 0.00 0.00 0.00 1997-01-27 00:00:00 NULL 0 2009-01-01
+30 Many Many NULL 1994-06-08 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-06-22 00:00:00 NULL 24 2009-01-01
+30 Many Many NULL 1996-01-15 29770.173 29770.173 NULL NULL 0.00 0.00 0.00 0.00 1996-01-18 00:00:00 NULL 35 2009-12-31
+30 Many Many NULL 1998-08-16 44561.46 44561.46 0.06 NULL 0.06 0.00 0.06 0.00 1998-08-14 00:00:00 NULL 34 2009-12-31
+31 Many Many NULL 1994-02-24 0.0 0.0 0.08 0.08 0.08 0.08 0.08 0.08 1994-02-20 00:00:00 NULL -19 2009-01-01
+31 Many Many NULL 1993-11-03 0.0 0.0 0.04 NULL 0.04 0.00 0.04 0.00 1993-11-08 00:00:00 NULL -41 2009-01-01
+32 Many Many NULL 1995-08-19 63313.3312 63313.3312 0.00 NULL 0.00 0.00 0.00 0.00 1995-08-27 00:00:00 NULL -41 2009-01-01
+32 Many Many NULL 1993-12-14 0.0 0.0 0.05 NULL 0.05 0.00 0.05 0.00 1993-12-28 00:00:00 NULL -7 2009-12-31
+32 Many Many NULL 1994-08-29 0.0 0.0 0.06 NULL 0.06 0.00 0.06 0.00 1994-08-31 00:00:00 NULL 14 2009-01-01
+32 Many Many NULL 1996-10-07 44955.15839999999 44955.15839999999 0.05 NULL 0.05 0.00 0.05 0.00 1996-10-14 00:00:00 NULL -66 2009-12-31
+32 Many Many NULL 1996-02-04 46146.7488 46146.7488 NULL NULL 0.00 0.00 0.00 0.00 1996-02-03 00:00:00 NULL -4 2009-01-01
+33 Many Many NULL 1998-04-17 54174.12 54174.12 0.01 NULL 0.01 0.00 0.01 0.00 1998-04-15 00:00:00 NULL 26 2009-01-01
+34 Many Many NULL 1998-03-10 56487.763199999994 56487.763199999994 NULL NULL 0.00 0.00 0.00 0.00 1998-03-30 00:00:00 NULL -23 2009-01-01
+34 Many Many NULL 1996-01-27 63982.002400000005 63982.002400000005 NULL NULL 0.00 0.00 0.00 0.00 1996-01-27 00:00:00 NULL 21 2009-01-01
+34 Many Many NULL 1995-11-13 60586.5448 60586.5448 0.06 NULL 0.06 0.00 0.06 0.00 1995-11-26 00:00:00 NULL -50 2009-01-01
+35 Many Many NULL 1996-01-21 40475.225 40475.225 0.03 0.03 0.03 0.03 0.03 0.03 1996-01-22 00:00:00 NULL -32 2009-01-01
+36 Many Many NULL 1996-04-17 41844.6756 41844.6756 0.06 0.06 0.06 0.06 0.06 0.06 1996-04-20 00:00:00 NULL 52 2009-01-01
+37 Many Many NULL 1994-02-18 0.0 0.0 0.04 NULL 0.04 0.00 0.04 0.00 1994-02-21 00:00:00 NULL -23 2009-01-01
+37 Many Many NULL 1993-04-23 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1993-04-14 00:00:00 NULL 15 2009-12-31
+37 Many Many NULL 1992-05-02 0.0 0.0 0.03 0.03 0.03 0.03 0.03 0.03 1992-05-02 00:00:00 NULL -13 2009-01-01
+38 Many Many NULL 1997-02-02 44694.46 44694.46 0.05 0.05 0.05 0.05 0.05 0.05 1997-02-02 00:00:00 NULL 19 2009-01-01
+38 Many Many NULL 1996-02-16 68028.3144 68028.3144 NULL NULL 0.00 0.00 0.00 0.00 1996-02-18 00:00:00 NULL -6 2009-01-01
+39 Many Many NULL 1998-02-03 45146.01 45146.01 NULL NULL 0.00 0.00 0.00 0.00 1998-02-18 00:00:00 NULL -48 2009-01-01
+39 Many Many NULL 1992-07-07 0.0 0.0 0.02 0.02 0.02 0.02 0.02 0.02 1992-07-28 00:00:00 NULL -21 2009-01-01
+40 Many Many NULL 1996-12-13 51224.736 51224.736 0.05 NULL 0.05 0.00 0.05 0.00 1997-01-01 00:00:00 NULL 71 2009-01-01
+40 Many Many NULL 1992-07-26 0.0 0.0 0.03 NULL 0.03 0.00 0.03 0.00 1992-08-15 00:00:00 NULL 14 2009-01-01
+41 Many Many NULL 1998-07-04 47989.6144 47989.6144 0.08 NULL 0.08 0.00 0.08 0.00 1998-07-06 00:00:00 NULL 9 2009-01-01
+41 Many Many NULL 1994-02-26 0.0 0.0 0.07 NULL 0.07 0.00 0.07 0.00 1994-03-18 00:00:00 NULL 17 2009-01-01
+41 Many Many NULL 1993-11-14 0.0 0.0 0.00 0.00 0.00 0.00 0.00 0.00 1993-11-11 00:00:00 NULL -74 2009-01-01
+42 Many Many NULL 1994-08-05 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-08-28 00:00:00 NULL 33 2009-12-31
+42 Many Many NULL 1996-02-13 68289.9672 68289.9672 0.00 NULL 0.00 0.00 0.00 0.00 1996-02-23 00:00:00 NULL 33 2009-01-01
+43 Many Many NULL 1996-10-22 62727.3207 62727.3207 0.01 NULL 0.01 0.00 0.01 0.00 1996-10-26 00:00:00 NULL -19 2009-12-31
+43 Many Many NULL 1992-07-15 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1992-08-02 00:00:00 NULL 27 2009-01-01
+44 Many Many NULL 1997-03-23 60781.124800000005 60781.124800000005 NULL NULL 0.00 0.00 0.00 0.00 1997-04-13 00:00:00 NULL 74 2009-12-31
+44 Many Many NULL 1996-10-04 80882.4192 80882.4192 0.02 NULL 0.02 0.00 0.02 0.00 1996-09-30 00:00:00 NULL -48 2009-01-01
+44 Many Many NULL 1995-09-02 75106.658 75106.658 NULL NULL 0.00 0.00 0.00 0.00 1995-09-14 00:00:00 NULL 25 2009-01-01
+44 Many Many NULL 1996-11-19 48941.692800000004 48941.692800000004 0.06 NULL 0.06 0.00 0.06 0.00 1996-12-12 00:00:00 NULL -3 2009-01-01
+45 Many Many NULL 1998-03-05 61489.35 61489.35 NULL NULL 0.00 0.00 0.00 0.00 1998-03-24 00:00:00 NULL 4 2009-01-01
+45 Many Many NULL 1994-02-07 0.0 0.0 0.00 NULL 0.00 0.00 0.00 0.00 1994-02-23 00:00:00 NULL 50 2009-01-01
+46 Many Many NULL 1996-01-20 73475.892 73475.892 0.07 NULL 0.07 0.00 0.07 0.00 1996-02-03 00:00:00 NULL -53 2009-01-01
+46 Many Many NULL 1998-07-01 56583.5144 56583.5144 0.05 NULL 0.05 0.00 0.05 0.00 1998-07-05 00:00:00 NULL 28 2009-01-01
+46 Many Many NULL 1998-08-18 84565.5168 84565.5168 0.05 NULL 0.05 0.00 0.05 0.00 1998-08-29 00:00:00 NULL 52 2009-01-01
+46 Many Many NULL 1996-10-01 77781.4092 77781.4092 NULL NULL 0.00 0.00 0.00 0.00 1996-10-26 00:00:00 NULL -54 2009-01-01
+48 Many Many NULL 1994-08-22 0.0 0.0 0.07 NULL 0.07 0.00 0.07 0.00 1994-09-08 00:00:00 NULL 28 2009-01-01
+49 Many Many NULL 1993-11-14 0.0 0.0 0.00 0.00 0.00 0.00 0.00 0.00 1993-11-24 00:00:00 NULL -26 2009-12-31
+50 Many Many NULL 1994-08-13 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-08-26 00:00:00 NULL -48 2009-12-31
+PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
+SELECT
+ L_QUANTITY as Quantity,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN "Many"
+ ELSE "Huge number" END AS Quantity_Description,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN "Many"
+ ELSE NULL END AS Quantity_Description_2,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN NULL
+ ELSE NULL END AS Quantity_Description_3,
+ IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date,
+ CASE WHEN L_RETURNFLAG = "N"
+ THEN l_extendedprice * (1 - l_discount)
+ ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting.
+ CASE WHEN L_RETURNFLAG = "N"
+ THEN l_extendedprice * (1 - l_discount)
+ ELSE CAST(0 AS DOUBLE) END AS Field_2,
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4,
+ -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal.
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6,
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8,
+ IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9,
+ IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10,
+ IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11,
+ IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12
+FROM lineitem_test
+ORDER BY Quantity
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
+SELECT
+ L_QUANTITY as Quantity,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN "Many"
+ ELSE "Huge number" END AS Quantity_Description,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN "Many"
+ ELSE NULL END AS Quantity_Description_2,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN NULL
+ ELSE NULL END AS Quantity_Description_3,
+ IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date,
+ CASE WHEN L_RETURNFLAG = "N"
+ THEN l_extendedprice * (1 - l_discount)
+ ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting.
+ CASE WHEN L_RETURNFLAG = "N"
+ THEN l_extendedprice * (1 - l_discount)
+ ELSE CAST(0 AS DOUBLE) END AS Field_2,
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4,
+ -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal.
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6,
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8,
+ IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9,
+ IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10,
+ IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11,
+ IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12
+FROM lineitem_test
+ORDER BY Quantity
+POSTHOOK: type: QUERY
+Explain
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: lineitem_test
+ Statistics: Num rows: 101 Data size: 78920 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:l_orderkey:int, 1:l_partkey:int, 2:l_suppkey:int, 3:l_linenumber:int, 4:l_quantity:int, 5:l_extendedprice:double, 6:l_discount:double, 7:l_tax:decimal(10,2), 8:l_returnflag:char(1), 9:l_linestatus:char(1), 10:l_shipdate:date, 11:l_commitdate:date, 12:l_receiptdate:date, 13:l_shipinstruct:varchar(20), 14:l_shipmode:char(10), 15:l_comment:string, 16:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>]
+ Select Operator
+ expressions: l_quantity (type: int), CASE WHEN ((l_quantity = 1)) THEN ('Single') WHEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') WHEN ((l_quantity < 100)) THEN ('Many') ELSE ('Huge number') END (type: string), CASE WHEN ((l_quantity = 1)) THEN ('Single') WHEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') WHEN ((l_quantity < 100)) THEN ('Many') ELSE (null) END (type: string), CASE WHEN ((l_quantity = 1)) THEN ('Single') WHEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') WHEN ((l_quantity < 100)) THEN (null) ELSE (null) END (type: string), if((l_shipmode = 'SHIP '), date_add(l_shipdate, 10), date_add(l_shipdate, 5)) (type: date), CASE WHEN ((l_returnflag = 'N')) THEN ((l_extendedprice * (1.0D - l_discount))) ELSE (0) END (type: double), CASE WHEN ((l_returnflag = 'N')) THEN ((l_extendedprice * (1.0D - l_discount))) ELSE (0.0D) END (type: double), if((UDFToString(l_shipinstruct) = 'DELIVER
IN PERSON'), null, l_tax) (type: decimal(10,2)), if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, null) (type: decimal(10,2)), if((UDFToString(l_shipinstruct) = 'DELIVER IN PERSON'), 0, l_tax) (type: decimal(12,2)), if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, 0) (type: decimal(12,2)), if((UDFToString(l_shipinstruct) = 'DELIVER IN PERSON'), 0, l_tax) (type: decimal(10,2)), if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, 0) (type: decimal(10,2)), if((l_partkey > 30), CAST( l_receiptdate AS TIMESTAMP), CAST( l_commitdate AS TIMESTAMP)) (type: timestamp), if((l_suppkey > 10000), datediff(l_receiptdate, l_commitdate), null) (type: int), if((l_suppkey > 10000), null, datediff(l_receiptdate, l_commitdate)) (type: int), if(((l_suppkey % 500) > 100), DATE'2009-01-01', DATE'2009-12-31') (type: date)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [4, 27, 38, 48, 52, 54, 60, 63, 65, 67, 68, 69, 70, 73, 76, 79, 80]
+ selectExpressions: IfExprColumnCondExpr(col 17:boolean, col 18:stringcol 26:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 17:boolean, ConstantVectorExpression(val Single) -> 18:string, IfExprColumnCondExpr(col 19:boolean, col 20:stringcol 25:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 19:boolean, ConstantVectorExpression(val Two) -> 20:string, IfExprColumnCondExpr(col 21:boolean, col 22:stringcol 24:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 21:boolean, ConstantVectorExpression(val Some) -> 22:string, IfExprStringScalarStringScalar(col 23:boolean, val Many, val Huge number)(children: LongColLessLongScalar(col 4:int, val 100) -> 23:boolean) -> 24:string) -> 25:string) -> 26:string) -> 27:string, IfExprColumnCondExpr(col 23:boolean, col 28:stringcol 37:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 23:boolean, ConstantVectorExpression(val Single) -> 28:string, IfExprColumnCondExpr(col 29:boolean, col
30:stringcol 36:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 29:boolean, ConstantVectorExpression(val Two) -> 30:string, IfExprColumnCondExpr(col 31:boolean, col 32:stringcol 35:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 31:boolean, ConstantVectorExpression(val Some) -> 32:string, IfExprColumnNull(col 33:boolean, col 34:string, null)(children: LongColLessLongScalar(col 4:int, val 100) -> 33:boolean, ConstantVectorExpression(val Many) -> 34:string) -> 35:string) -> 36:string) -> 37:string) -> 38:string, IfExprColumnCondExpr(col 39:boolean, col 40:stringcol 47:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 39:boolean, ConstantVectorExpression(val Single) -> 40:string, IfExprColumnCondExpr(col 41:boolean, col 42:stringcol 46:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 41:boolean, ConstantVectorExpression(val Two) -> 42:string, IfExprColumnCondExpr(col 43:boolean, col 44:stringcol 45:string)(children: LongColLessLongS
calar(col 4:int, val 10) -> 43:boolean, ConstantVectorExpression(val Some) -> 44:string, IfExprNullNull(null, null) -> 45:string) -> 46:string) -> 47:string) -> 48:string, IfExprCondExprCondExpr(col 49:boolean, col 50:datecol 51:date)(children: StringGroupColEqualCharScalar(col 14:char(10), val SHIP) -> 49:boolean, VectorUDFDateAddColScalar(col 10:date, val 10) -> 50:date, VectorUDFDateAddColScalar(col 10:date, val 5) -> 51:date) -> 52:date, IfExprDoubleColumnLongScalar(col 57:boolean, col 58:double, val 0)(children: StringGroupColEqualCharScalar(col 8:char(1), val N) -> 57:boolean, DoubleColMultiplyDoubleColumn(col 5:double, col 54:double)(children: DoubleScalarSubtractDoubleColumn(val 1.0, col 6:double) -> 54:double) -> 58:double) -> 54:double, IfExprCondExprColumn(col 57:boolean, col 59:double, col 58:double)(children: StringGroupColEqualCharScalar(col 8:char(1), val N) -> 57:boolean, DoubleColMultiplyDoubleColumn(col 5:double, col 58:double)(children: DoubleScalarSubtractDoubleC
olumn(val 1.0, col 6:double) -> 58:double) -> 59:double, ConstantVectorExpression(val 0.0) -> 58:double) -> 60:double, IfExprNullColumn(col 62:boolean, null, col 7)(children: StringGroupColEqualStringScalar(col 61:string, val DELIVER IN PERSON)(children: CastStringGroupToString(col 13:varchar(20)) -> 61:string) -> 62:boolean, col 7:decimal(10,2)) -> 63:decimal(10,2), IfExprColumnNull(col 64:boolean, col 7:decimal(10,2), null)(children: StringGroupColEqualStringScalar(col 61:string, val TAKE BACK RETURN)(children: CastStringGroupToString(col 13:varchar(20)) -> 61:string) -> 64:boolean, col 7:decimal(10,2)) -> 65:decimal(10,2), VectorUDFAdaptor(if((UDFToString(l_shipinstruct) = 'DELIVER IN PERSON'), 0, l_tax))(children: StringGroupColEqualStringScalar(col 61:string, val DELIVER IN PERSON)(children: CastStringGroupToString(col 13:varchar(20)) -> 61:string) -> 66:boolean) -> 67:decimal(12,2), VectorUDFAdaptor(if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, 0))(children: St
ringGroupColEqualStringScalar(col 61:string, val TAKE BACK RETURN)(children: CastStringGroupToString(col 13:varchar(20)) -> 61:string) -> 66:boolean) -> 68:decimal(12,2), VectorUDFAdaptor(if((UDFToString(l_shipinstruct) = 'DELIVER IN PERSON'), 0, l_tax))(children: StringGroupColEqualStringScalar(col 61:string, val DELIVER IN PERSON)(children: CastStringGroupToString(col 13:varchar(20)) -> 61:string) -> 66:boolean) -> 69:decimal(10,2), VectorUDFAdaptor(if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, 0))(children: StringGroupColEqualStringScalar(col 61:string, val TAKE BACK RETURN)(children: CastStringGroupToString(col 13:varchar(20)) -> 61:string) -> 66:boolean) -> 70:decimal(10,2), IfExprCondExprCondExpr(col 66:boolean, col 71:timestampcol 72:timestamp)(children: LongColGreaterLongScalar(col 1:int, val 30) -> 66:boolean, CastDateToTimestamp(col 12:date) -> 71:timestamp, CastDateToTimestamp(col 11:date) -> 72:timestamp) -> 73:timestamp, IfExprCondExprNull(col 74:boolean
, col 75:int, null)(children: LongColGreaterLongScalar(col 2:int, val 10000) -> 74:boolean, VectorUDFDateDiffColCol(col 12:date, col 11:date) -> 75:int) -> 76:int, IfExprNullCondExpr(col 77:boolean, null, col 78:int)(children: LongColGreaterLongScalar(col 2:int, val 10000) -> 77:boolean, VectorUDFDateDiffColCol(col 12:date, col 11:date) -> 78:int) -> 79:int, IfExprLongScalarLongScalar(col 81:boolean, val 14245, val 14609)(children: LongColGreaterLongScalar(col 80:int, val 100)(children: LongColModuloLongScalar(col 2:int, val 500) -> 80:int) -> 81:boolean) -> 80:date
+ Statistics: Num rows: 101 Data size: 78920 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
+ Statistics: Num rows: 101 Data size: 78920 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: date), _col5 (type: double), _col6 (type: double), _col7 (type: decimal(10,2)), _col8 (type: decimal(10,2)), _col9 (type: decimal(12,2)), _col10 (type: decimal(12,2)), _col11 (type: decimal(10,2)), _col12 (type: decimal(10,2)), _col13 (type: timestamp), _col14 (type: int), _col15 (type: int), _col16 (type: date)
+ Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: []
+ featureSupportInUse: []
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: true
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 16
+ includeColumns: [1, 2, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14]
+ dataColumns: l_orderkey:int, l_partkey:int, l_suppkey:int, l_linenumber:int, l_quantity:int, l_extendedprice:double, l_discount:double, l_tax:decimal(10,2), l_returnflag:char(1), l_linestatus:char(1), l_shipdate:date, l_commitdate:date, l_receiptdate:date, l_shipinstruct:varchar(20), l_shipmode:char(10), l_comment:string
+ partitionColumnCount: 0
+ scratchColumnTypeNames: [bigint, string, bigint, string, bigint, string, bigint, string, string, string, string, string, bigint, string, bigint, string, bigint, string, string, string, string, string, bigint, string, bigint, string, bigint, string, string, string, string, string, bigint, bigint, bigint, bigint, bigint, double, double, bigint, bigint, double, double, double, string, bigint, decimal(10,2), bigint, decimal(10,2), bigint, decimal(12,2), decimal(12,2), decimal(10,2), decimal(10,2), timestamp, timestamp, timestamp, bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint]
+ Reduce Vectorization:
+ enabled: false
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
+ enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: date), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: decimal(10,2)), VALUE._col7 (type: decimal(10,2)), VALUE._col8 (type: decimal(12,2)), VALUE._col9 (type: decimal(12,2)), VALUE._col10 (type: decimal(10,2)), VALUE._col11 (type: decimal(10,2)), VALUE._col12 (type: timestamp), VALUE._col13 (type: int), VALUE._col14 (type: int), VALUE._col15 (type: date)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16
+ Statistics: Num rows: 101 Data size: 78920 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 101 Data size: 78920 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT
+ L_QUANTITY as Quantity,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN "Many"
+ ELSE "Huge number" END AS Quantity_Description,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN "Many"
+ ELSE NULL END AS Quantity_Description_2,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN NULL
+ ELSE NULL END AS Quantity_Description_3,
+ IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date,
+ CASE WHEN L_RETURNFLAG = "N"
+ THEN l_extendedprice * (1 - l_discount)
+ ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting.
+ CASE WHEN L_RETURNFLAG = "N"
+ THEN l_extendedprice * (1 - l_discount)
+ ELSE CAST(0 AS DOUBLE) END AS Field_2,
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4,
+ -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal.
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6,
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8,
+ IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9,
+ IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10,
+ IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11,
+ IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12
+FROM lineitem_test
+ORDER BY Quantity
+PREHOOK: type: QUERY
+PREHOOK: Input: default@lineitem_test
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT
+ L_QUANTITY as Quantity,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN "Many"
+ ELSE "Huge number" END AS Quantity_Description,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN "Many"
+ ELSE NULL END AS Quantity_Description_2,
+ CASE
+ WHEN L_QUANTITY = 1 THEN "Single"
+ WHEN L_QUANTITY = 2 THEN "Two"
+ WHEN L_QUANTITY < 10 THEN "Some"
+ WHEN L_QUANTITY < 100 THEN NULL
+ ELSE NULL END AS Quantity_Description_3,
+ IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date,
+ CASE WHEN L_RETURNFLAG = "N"
+ THEN l_extendedprice * (1 - l_discount)
+ ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting.
+ CASE WHEN L_RETURNFLAG = "N"
+ THEN l_extendedprice * (1 - l_discount)
+ ELSE CAST(0 AS DOUBLE) END AS Field_2,
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4,
+ -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal.
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6,
+ IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7,
+ IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8,
+ IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9,
+ IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10,
+ IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11,
+ IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12
+FROM lineitem_test
+ORDER BY Quantity
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@lineitem_test
+#### A masked pattern was here ####
+quantity quantity_description quantity_description_2 quantity_description_3 expected_date field_1 field_2 field_3 field_4 field_5 field_6 field_7 field_8 field_9 field_10 field_11 field_12
+NULL Huge number NULL NULL NULL 0.0 0.0 NULL NULL NULL 0.00 NULL 0.00 NULL NULL NULL 2009-12-31
+1 Single Single Single 1994-12-06 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-12-15 00:00:00 NULL 3 2009-01-01
+1 Single Single Single 1994-01-31 0.0 0.0 0.05 0.05 0.05 0.05 0.05 0.05 1994-01-28 00:00:00 NULL -36 2009-01-01
+2 Two Two Two 1995-08-12 2011.3912000000003 2011.3912000000003 NULL NULL 0.00 0.00 0.00 0.00 1995-08-23 00:00:00 NULL -45 2009-01-01
+2 Two Two Two 1993-12-09 0.0 0.0 0.06 NULL 0.06 0.00 0.06 0.00 1994-01-01 00:00:00 NULL -6 2009-01-01
+3 Some Some Some 1994-06-11 0.0 0.0 0.04 NULL 0.04 0.00 0.04 0.00 1994-06-15 00:00:00 NULL -42 2009-12-31
+3 Some Some Some 1998-06-02 5137.6143 5137.6143 0.07 NULL 0.07 0.00 0.07 0.00 1998-06-02 00:00:00 NULL 60 2009-01-01
+3 Some Some Some 1998-07-09 2778.921 2778.921 0.02 NULL 0.02 0.00 0.02 0.00 1998-07-21 00:00:00 NULL 46 2009-12-31
+4 Some Some Some 1995-08-09 5990.4936 5990.4936 0.03 NULL 0.03 0.00 0.03 0.00 1995-09-03 00:00:00 NULL -28 2009-01-01
+4 Some Some Some 1997-04-27 5669.7732000000005 5669.7732000000005 0.04 NULL 0.04 0.00 0.04 0.00 1997-04-20 00:00:00 NULL 79 2009-01-01
+5 Some Some Some 1997-02-25 8116.96 8116.96 NULL NULL 0.00 0.00 0.00 0.00 1997-02-21 00:00:00 NULL 9 2009-01-01
+5 Some Some Some 1996-02-15 6217.103999999999 6217.103999999999 0.02 NULL 0.02 0.00 0.02 0.00 1996-02-13 00:00:00 NULL -42 2009-01-01
+5 Some Some Some 1993-12-14 0.0 0.0 0.03 0.03 0.03 0.03 0.03 0.03 1993-12-23 00:00:00 NULL -2 2009-01-01
+6 Some Some Some 1998-11-04 9487.6152 9487.6152 0.06 NULL 0.06 0.00
<TRUNCATED>
[5/7] hive git commit: HIVE-18819: Vectorization: Optimize IF
statement expression evaluation of THEN/ELSE (Matt McCline,
reviewed by Teddy Choi)
Posted by mm...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/53980ba6/ql/src/test/results/clientpositive/llap/vector_case_when_2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_case_when_2.q.out b/ql/src/test/results/clientpositive/llap/vector_case_when_2.q.out
new file mode 100644
index 0000000..38187bb
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/vector_case_when_2.q.out
@@ -0,0 +1,884 @@
+PREHOOK: query: create table timestamps_txt (tsval timestamp) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@timestamps_txt
+POSTHOOK: query: create table timestamps_txt (tsval timestamp) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@timestamps_txt
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/timestamps.txt' OVERWRITE INTO TABLE timestamps_txt
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@timestamps_txt
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/timestamps.txt' OVERWRITE INTO TABLE timestamps_txt
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@timestamps_txt
+PREHOOK: query: create table timestamps (cdate date, ctimestamp1 timestamp, stimestamp1 string, ctimestamp2 timestamp) stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@timestamps
+POSTHOOK: query: create table timestamps (cdate date, ctimestamp1 timestamp, stimestamp1 string, ctimestamp2 timestamp) stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@timestamps
+PREHOOK: query: insert overwrite table timestamps
+ select cast(tsval as date), tsval, cast(tsval as string), tsval - '1 2:3:4' day to second from timestamps_txt
+PREHOOK: type: QUERY
+PREHOOK: Input: default@timestamps_txt
+PREHOOK: Output: default@timestamps
+POSTHOOK: query: insert overwrite table timestamps
+ select cast(tsval as date), tsval, cast(tsval as string), tsval - '1 2:3:4' day to second from timestamps_txt
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@timestamps_txt
+POSTHOOK: Output: default@timestamps
+POSTHOOK: Lineage: timestamps.cdate EXPRESSION [(timestamps_txt)timestamps_txt.FieldSchema(name:tsval, type:timestamp, comment:null), ]
+POSTHOOK: Lineage: timestamps.ctimestamp1 SIMPLE [(timestamps_txt)timestamps_txt.FieldSchema(name:tsval, type:timestamp, comment:null), ]
+POSTHOOK: Lineage: timestamps.ctimestamp2 EXPRESSION [(timestamps_txt)timestamps_txt.FieldSchema(name:tsval, type:timestamp, comment:null), ]
+POSTHOOK: Lineage: timestamps.stimestamp1 EXPRESSION [(timestamps_txt)timestamps_txt.FieldSchema(name:tsval, type:timestamp, comment:null), ]
+tsval tsval _c2 _c3
+PREHOOK: query: INSERT INTO TABLE timestamps VALUES (NULL,NULL,NULL,NULL)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@timestamps
+POSTHOOK: query: INSERT INTO TABLE timestamps VALUES (NULL,NULL,NULL,NULL)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@timestamps
+POSTHOOK: Lineage: timestamps.cdate EXPRESSION []
+POSTHOOK: Lineage: timestamps.ctimestamp1 EXPRESSION []
+POSTHOOK: Lineage: timestamps.ctimestamp2 EXPRESSION []
+POSTHOOK: Lineage: timestamps.stimestamp1 EXPRESSION []
+_col0 _col1 _col2 _col3
+PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
+SELECT
+ ctimestamp1,
+ ctimestamp2,
+ CASE
+ WHEN ctimestamp2 <= date '1800-12-31' THEN "1800s or Earlier"
+ WHEN ctimestamp2 < date '1900-01-01' THEN "1900s"
+ WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s"
+ ELSE "Unknown" END AS ctimestamp2_Description,
+ CASE
+ WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old"
+ WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s"
+ WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s"
+ ELSE NULL END AS ctimestamp2_Description_2,
+ CASE
+ WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old"
+ WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s"
+ WHEN ctimestamp2 between timestamp '2006-01-01 00:00:00.000' and timestamp '2010-12-31 23:59:59.999999999' THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN NULL
+ ELSE NULL END AS ctimestamp2_Description_3,
+ IF(timestamp '1974-10-04 17:21:03.989' > ctimestamp1, year(ctimestamp1), year(ctimestamp2)) AS field1,
+ CASE WHEN stimestamp1 LIKE '%19%'
+ THEN stimestamp1
+ ELSE timestamp '2018-03-08 23:04:59' END AS Field_2,
+ IF(ctimestamp1 = timestamp '2021-09-24 03:18:32.413655165' , NULL, minute(ctimestamp1)) AS Field_3,
+ IF(ctimestamp2 >= timestamp '5344-10-04 18:40:08.165' and ctimestamp2 < timestamp '6631-11-13 16:31:29.702202248', minute(ctimestamp1), NULL) AS Field_4,
+ IF(cast(ctimestamp1 as double) % 500 > 100, DATE_ADD(cdate, 1), DATE_ADD(cdate, 365)) AS Field_5
+FROM timestamps
+ORDER BY ctimestamp1, stimestamp1, ctimestamp2
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
+SELECT
+ ctimestamp1,
+ ctimestamp2,
+ CASE
+ WHEN ctimestamp2 <= date '1800-12-31' THEN "1800s or Earlier"
+ WHEN ctimestamp2 < date '1900-01-01' THEN "1900s"
+ WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s"
+ ELSE "Unknown" END AS ctimestamp2_Description,
+ CASE
+ WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old"
+ WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s"
+ WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s"
+ ELSE NULL END AS ctimestamp2_Description_2,
+ CASE
+ WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old"
+ WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s"
+ WHEN ctimestamp2 between timestamp '2006-01-01 00:00:00.000' and timestamp '2010-12-31 23:59:59.999999999' THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN NULL
+ ELSE NULL END AS ctimestamp2_Description_3,
+ IF(timestamp '1974-10-04 17:21:03.989' > ctimestamp1, year(ctimestamp1), year(ctimestamp2)) AS field1,
+ CASE WHEN stimestamp1 LIKE '%19%'
+ THEN stimestamp1
+ ELSE timestamp '2018-03-08 23:04:59' END AS Field_2,
+ IF(ctimestamp1 = timestamp '2021-09-24 03:18:32.413655165' , NULL, minute(ctimestamp1)) AS Field_3,
+ IF(ctimestamp2 >= timestamp '5344-10-04 18:40:08.165' and ctimestamp2 < timestamp '6631-11-13 16:31:29.702202248', minute(ctimestamp1), NULL) AS Field_4,
+ IF(cast(ctimestamp1 as double) % 500 > 100, DATE_ADD(cdate, 1), DATE_ADD(cdate, 365)) AS Field_5
+FROM timestamps
+ORDER BY ctimestamp1, stimestamp1, ctimestamp2
+POSTHOOK: type: QUERY
+Explain
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: timestamps
+ Statistics: Num rows: 51 Data size: 16000 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), CASE WHEN ((ctimestamp2 <= TIMESTAMP'1800-12-31 00:00:00.0')) THEN ('1800s or Earlier') WHEN ((ctimestamp2 < TIMESTAMP'1900-01-01 00:00:00.0')) THEN ('1900s') WHEN (ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00.0' AND TIMESTAMP'2010-12-31 23:59:59.999999999') THEN ('Late 2000s') WHEN ((ctimestamp2 <= TIMESTAMP'2015-12-31 23:59:59.999999999')) THEN ('Early 2010s') ELSE ('Unknown') END (type: string), CASE WHEN ((ctimestamp2 <= TIMESTAMP'2000-12-31 23:59:59.999999999')) THEN ('Old') WHEN ((ctimestamp2 < TIMESTAMP'2006-01-01 00:00:00.0')) THEN ('Early 2000s') WHEN (ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00.0' AND TIMESTAMP'2010-12-31 23:59:59.999999999') THEN ('Late 2000s') WHEN ((ctimestamp2 <= TIMESTAMP'2015-12-31 23:59:59.999999999')) THEN ('Early 2010s') ELSE (null) END (type: string), CASE WHEN ((ctimestamp2 <= TIMESTAMP'2000-12-31 23:59:59.999999999')) THEN ('Old') WHEN ((ct
imestamp2 < TIMESTAMP'2006-01-01 00:00:00.0')) THEN ('Early 2000s') WHEN (ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00.0' AND TIMESTAMP'2010-12-31 23:59:59.999999999') THEN ('Late 2000s') WHEN ((ctimestamp2 <= TIMESTAMP'2015-12-31 23:59:59.999999999')) THEN (null) ELSE (null) END (type: string), if((TIMESTAMP'1974-10-04 17:21:03.989' > ctimestamp1), year(ctimestamp1), year(ctimestamp2)) (type: int), CASE WHEN ((stimestamp1 like '%19%')) THEN (stimestamp1) ELSE (TIMESTAMP'2018-03-08 23:04:59.0') END (type: string), if((ctimestamp1 = TIMESTAMP'2021-09-24 03:18:32.413655165'), null, minute(ctimestamp1)) (type: int), if(((ctimestamp2 >= TIMESTAMP'5344-10-04 18:40:08.165') and (ctimestamp2 < TIMESTAMP'6631-11-13 16:31:29.702202248')), minute(ctimestamp1), null) (type: int), if(((UDFToDouble(ctimestamp1) % 500.0D) > 100.0D), date_add(cdate, 1), date_add(cdate, 365)) (type: date), stimestamp1 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
+ Statistics: Num rows: 51 Data size: 16000 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: timestamp), _col10 (type: string), _col1 (type: timestamp)
+ sort order: +++
+ Statistics: Num rows: 51 Data size: 16000 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: int), _col8 (type: int), _col9 (type: date)
+ Execution mode: llap
+ LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ notVectorizedReason: SELECT operator: Unexpected primitive type category VOID
+ vectorized: false
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ reduceColumnNullOrder: aaa
+ reduceColumnSortOrder: +++
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 11
+ dataColumns: KEY.reducesinkkey0:timestamp, KEY.reducesinkkey1:string, KEY.reducesinkkey2:timestamp, VALUE._col0:string, VALUE._col1:string, VALUE._col2:string, VALUE._col3:int, VALUE._col4:string, VALUE._col5:int, VALUE._col6:int, VALUE._col7:date
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey2 (type: timestamp), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: int), VALUE._col4 (type: string), VALUE._col5 (type: int), VALUE._col6 (type: int), VALUE._col7 (type: date)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+ Statistics: Num rows: 51 Data size: 16000 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 51 Data size: 16000 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT
+ ctimestamp1,
+ ctimestamp2,
+ CASE
+ WHEN ctimestamp2 <= date '1800-12-31' THEN "1800s or Earlier"
+ WHEN ctimestamp2 < date '1900-01-01' THEN "1900s"
+ WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s"
+ ELSE "Unknown" END AS ctimestamp2_Description,
+ CASE
+ WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old"
+ WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s"
+ WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s"
+ ELSE NULL END AS ctimestamp2_Description_2,
+ CASE
+ WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old"
+ WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s"
+ WHEN ctimestamp2 between timestamp '2006-01-01 00:00:00.000' and timestamp '2010-12-31 23:59:59.999999999' THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN NULL
+ ELSE NULL END AS ctimestamp2_Description_3,
+ IF(timestamp '1974-10-04 17:21:03.989' > ctimestamp1, year(ctimestamp1), year(ctimestamp2)) AS field1,
+ CASE WHEN stimestamp1 LIKE '%19%'
+ THEN stimestamp1
+ ELSE timestamp '2018-03-08 23:04:59' END AS Field_2,
+ IF(ctimestamp1 = timestamp '2021-09-24 03:18:32.413655165' , NULL, minute(ctimestamp1)) AS Field_3,
+ IF(ctimestamp2 >= timestamp '5344-10-04 18:40:08.165' and ctimestamp2 < timestamp '6631-11-13 16:31:29.702202248', minute(ctimestamp1), NULL) AS Field_4,
+ IF(cast(ctimestamp1 as double) % 500 > 100, DATE_ADD(cdate, 1), DATE_ADD(cdate, 365)) AS Field_5
+FROM timestamps
+ORDER BY ctimestamp1, stimestamp1, ctimestamp2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@timestamps
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT
+ ctimestamp1,
+ ctimestamp2,
+ CASE
+ WHEN ctimestamp2 <= date '1800-12-31' THEN "1800s or Earlier"
+ WHEN ctimestamp2 < date '1900-01-01' THEN "1900s"
+ WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s"
+ ELSE "Unknown" END AS ctimestamp2_Description,
+ CASE
+ WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old"
+ WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s"
+ WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s"
+ ELSE NULL END AS ctimestamp2_Description_2,
+ CASE
+ WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old"
+ WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s"
+ WHEN ctimestamp2 between timestamp '2006-01-01 00:00:00.000' and timestamp '2010-12-31 23:59:59.999999999' THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN NULL
+ ELSE NULL END AS ctimestamp2_Description_3,
+ IF(timestamp '1974-10-04 17:21:03.989' > ctimestamp1, year(ctimestamp1), year(ctimestamp2)) AS field1,
+ CASE WHEN stimestamp1 LIKE '%19%'
+ THEN stimestamp1
+ ELSE timestamp '2018-03-08 23:04:59' END AS Field_2,
+ IF(ctimestamp1 = timestamp '2021-09-24 03:18:32.413655165' , NULL, minute(ctimestamp1)) AS Field_3,
+ IF(ctimestamp2 >= timestamp '5344-10-04 18:40:08.165' and ctimestamp2 < timestamp '6631-11-13 16:31:29.702202248', minute(ctimestamp1), NULL) AS Field_4,
+ IF(cast(ctimestamp1 as double) % 500 > 100, DATE_ADD(cdate, 1), DATE_ADD(cdate, 365)) AS Field_5
+FROM timestamps
+ORDER BY ctimestamp1, stimestamp1, ctimestamp2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@timestamps
+#### A masked pattern was here ####
+ctimestamp1 ctimestamp2 ctimestamp2_description ctimestamp2_description_2 ctimestamp2_description_3 field1 field_2 field_3 field_4 field_5
+NULL NULL Unknown NULL NULL NULL 2018-03-08 23:04:59 NULL NULL NULL
+0004-09-22 18:26:29.519542222 0004-09-21 16:23:25.519542222 1800s or Earlier Old Old 4 0004-09-22 18:26:29.519542222 26 NULL 0005-09-22
+0528-10-27 08:15:18.941718273 0528-10-26 06:12:14.941718273 1800s or Earlier Old Old 528 2018-03-08 23:04:59 15 NULL 0529-10-27
+1319-02-02 16:31:57.778 1319-02-01 14:28:53.778 1800s or Earlier Old Old 1319 1319-02-02 16:31:57.778 31 NULL 1320-02-02
+1404-07-23 15:32:16.059185026 1404-07-22 13:29:12.059185026 1800s or Earlier Old Old 1404 2018-03-08 23:04:59 32 NULL 1405-07-23
+1815-05-06 00:12:37.543584705 1815-05-04 22:09:33.543584705 1900s Old Old 1815 2018-03-08 23:04:59 12 NULL 1816-05-05
+1883-04-17 04:14:34.647766229 1883-04-16 02:11:30.647766229 1900s Old Old 1883 2018-03-08 23:04:59 14 NULL 1884-04-16
+1966-08-16 13:36:50.183618031 1966-08-15 11:33:46.183618031 Early 2010s Old Old 1966 1966-08-16 13:36:50.183618031 36 NULL 1967-08-16
+1973-04-17 06:30:38.596784156 1973-04-16 04:27:34.596784156 Early 2010s Old Old 1973 1973-04-17 06:30:38.596784156 30 NULL 1974-04-17
+1974-10-04 17:21:03.989 1974-10-03 15:17:59.989 Early 2010s Old Old 1974 1974-10-04 17:21:03.989 21 NULL 1974-10-05
+1976-03-03 04:54:33.000895162 1976-03-02 02:51:29.000895162 Early 2010s Old Old 1976 1976-03-03 04:54:33.000895162 54 NULL 1976-03-04
+1976-05-06 00:42:30.910786948 1976-05-04 22:39:26.910786948 Early 2010s Old Old 1976 1976-05-06 00:42:30.910786948 42 NULL 1977-05-06
+1978-08-05 14:41:05.501 1978-08-04 12:38:01.501 Early 2010s Old Old 1978 1978-08-05 14:41:05.501 41 NULL 1978-08-06
+1981-04-25 09:01:12.077192689 1981-04-24 06:58:08.077192689 Early 2010s Old Old 1981 1981-04-25 09:01:12.077192689 1 NULL 1982-04-25
+1981-11-15 23:03:10.999338387 1981-11-14 21:00:06.999338387 Early 2010s Old Old 1981 1981-11-15 23:03:10.999338387 3 NULL 1981-11-16
+1985-07-20 09:30:11 1985-07-19 07:27:07 Early 2010s Old Old 1985 1985-07-20 09:30:11 30 NULL 1986-07-20
+1985-11-18 16:37:54 1985-11-17 14:34:50 Early 2010s Old Old 1985 1985-11-18 16:37:54 37 NULL 1985-11-19
+1987-02-21 19:48:29 1987-02-20 17:45:25 Early 2010s Old Old 1987 1987-02-21 19:48:29 48 NULL 1987-02-22
+1987-05-28 13:52:07.900916635 1987-05-27 11:49:03.900916635 Early 2010s Old Old 1987 1987-05-28 13:52:07.900916635 52 NULL 1988-05-27
+1998-10-16 20:05:29.397591987 1998-10-15 18:02:25.397591987 Early 2010s Old Old 1998 1998-10-16 20:05:29.397591987 5 NULL 1999-10-16
+1999-10-03 16:59:10.396903939 1999-10-02 14:56:06.396903939 Early 2010s Old Old 1999 1999-10-03 16:59:10.396903939 59 NULL 1999-10-04
+2000-12-18 08:42:30.000595596 2000-12-17 06:39:26.000595596 Early 2010s Old Old 2000 2018-03-08 23:04:59 42 NULL 2000-12-19
+2002-05-10 05:29:48.990818073 2002-05-09 03:26:44.990818073 Early 2010s Early 2000s Early 2000s 2002 2018-03-08 23:04:59 29 NULL 2002-05-11
+2003-09-23 22:33:17.00003252 2003-09-22 20:30:13.00003252 Early 2010s Early 2000s Early 2000s 2003 2018-03-08 23:04:59 33 NULL 2004-09-22
+2004-03-07 20:14:13 2004-03-06 18:11:09 Early 2010s Early 2000s Early 2000s 2004 2018-03-08 23:04:59 14 NULL 2004-03-08
+2007-02-09 05:17:29.368756876 2007-02-08 03:14:25.368756876 Late 2000s Late 2000s Late 2000s 2007 2018-03-08 23:04:59 17 NULL 2008-02-09
+2009-01-21 10:49:07.108 2009-01-20 08:46:03.108 Late 2000s Late 2000s Late 2000s 2009 2018-03-08 23:04:59 49 NULL 2009-01-22
+2010-04-08 02:43:35.861742727 2010-04-07 00:40:31.861742727 Late 2000s Late 2000s Late 2000s 2010 2018-03-08 23:04:59 43 NULL 2010-04-09
+2013-04-07 02:44:43.00086821 2013-04-06 00:41:39.00086821 Early 2010s Early 2010s NULL 2013 2018-03-08 23:04:59 44 NULL 2013-04-08
+2013-04-10 00:43:46.854731546 2013-04-08 22:40:42.854731546 Early 2010s Early 2010s NULL 2013 2018-03-08 23:04:59 43 NULL 2013-04-11
+2021-09-24 03:18:32.413655165 2021-09-23 01:15:28.413655165 Unknown NULL NULL 2021 2018-03-08 23:04:59 NULL NULL 2021-09-25
+2024-11-11 16:42:41.101 2024-11-10 14:39:37.101 Unknown NULL NULL 2024 2018-03-08 23:04:59 42 NULL 2024-11-12
+4143-07-08 10:53:27.252802259 4143-07-07 08:50:23.252802259 Unknown NULL NULL 4143 2018-03-08 23:04:59 53 NULL 4143-07-09
+4966-12-04 09:30:55.202 4966-12-03 07:27:51.202 Unknown NULL NULL 4966 2018-03-08 23:04:59 30 NULL 4966-12-05
+5339-02-01 14:10:01.085678691 5339-01-31 12:06:57.085678691 Unknown NULL NULL 5339 2018-03-08 23:04:59 10 NULL 5340-02-01
+5344-10-04 18:40:08.165 5344-10-03 16:37:04.165 Unknown NULL NULL 5344 2018-03-08 23:04:59 40 NULL 5344-10-05
+5397-07-13 07:12:32.000896438 5397-07-12 05:09:28.000896438 Unknown NULL NULL 5397 2018-03-08 23:04:59 12 12 5397-07-14
+5966-07-09 03:30:50.597 5966-07-08 01:27:46.597 Unknown NULL NULL 5966 2018-03-08 23:04:59 30 30 5967-07-09
+6229-06-28 02:54:28.970117179 6229-06-27 00:51:24.970117179 Unknown NULL NULL 6229 2018-03-08 23:04:59 54 54 6230-06-28
+6482-04-27 12:07:38.073915413 6482-04-26 10:04:34.073915413 Unknown NULL NULL 6482 2018-03-08 23:04:59 7 7 6482-04-28
+6631-11-13 16:31:29.702202248 6631-11-12 14:28:25.702202248 Unknown NULL NULL 6631 2018-03-08 23:04:59 31 31 6631-11-14
+6705-09-28 18:27:28.000845672 6705-09-27 16:24:24.000845672 Unknown NULL NULL 6705 2018-03-08 23:04:59 27 NULL 6705-09-29
+6731-02-12 08:12:48.287783702 6731-02-11 06:09:44.287783702 Unknown NULL NULL 6731 2018-03-08 23:04:59 12 NULL 6731-02-13
+7160-12-02 06:00:24.81200852 7160-12-01 03:57:20.81200852 Unknown NULL NULL 7160 2018-03-08 23:04:59 0 NULL 7161-12-02
+7409-09-07 23:33:32.459349602 7409-09-06 21:30:28.459349602 Unknown NULL NULL 7409 2018-03-08 23:04:59 33 NULL 7409-09-08
+7503-06-23 23:14:17.486 7503-06-22 21:11:13.486 Unknown NULL NULL 7503 2018-03-08 23:04:59 14 NULL 7503-06-24
+8422-07-22 03:21:45.745036084 8422-07-21 01:18:41.745036084 Unknown NULL NULL 8422 2018-03-08 23:04:59 21 NULL 8422-07-23
+8521-01-16 20:42:05.668832388 8521-01-15 18:39:01.668832388 Unknown NULL NULL 8521 2018-03-08 23:04:59 42 NULL 8521-01-17
+9075-06-13 16:20:09.218517797 9075-06-12 14:17:05.218517797 Unknown NULL NULL 9075 2018-03-08 23:04:59 20 NULL 9075-06-14
+9209-11-11 04:08:58.223768453 9209-11-10 02:05:54.223768453 Unknown NULL NULL 9209 2018-03-08 23:04:59 8 NULL 9209-11-12
+9403-01-09 18:12:33.547 9403-01-08 16:09:29.547 Unknown NULL NULL 9403 2018-03-08 23:04:59 12 NULL 9403-01-10
+PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
+SELECT
+ ctimestamp1,
+ ctimestamp2,
+ CASE
+ WHEN ctimestamp2 <= date '1800-12-31' THEN "1800s or Earlier"
+ WHEN ctimestamp2 < date '1900-01-01' THEN "1900s"
+ WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s"
+ ELSE "Unknown" END AS ctimestamp2_Description,
+ CASE
+ WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old"
+ WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s"
+ WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s"
+ ELSE NULL END AS ctimestamp2_Description_2,
+ CASE
+ WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old"
+ WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s"
+ WHEN ctimestamp2 between timestamp '2006-01-01 00:00:00.000' and timestamp '2010-12-31 23:59:59.999999999' THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN NULL
+ ELSE NULL END AS ctimestamp2_Description_3,
+ IF(timestamp '1974-10-04 17:21:03.989' > ctimestamp1, year(ctimestamp1), year(ctimestamp2)) AS field1,
+ CASE WHEN stimestamp1 LIKE '%19%'
+ THEN stimestamp1
+ ELSE timestamp '2018-03-08 23:04:59' END AS Field_2,
+ IF(ctimestamp1 = timestamp '2021-09-24 03:18:32.413655165' , NULL, minute(ctimestamp1)) AS Field_3,
+ IF(ctimestamp2 >= timestamp '5344-10-04 18:40:08.165' and ctimestamp2 < timestamp '6631-11-13 16:31:29.702202248', minute(ctimestamp1), NULL) AS Field_4,
+ IF(cast(ctimestamp1 as double) % 500 > 100, DATE_ADD(cdate, 1), DATE_ADD(cdate, 365)) AS Field_5
+FROM timestamps
+ORDER BY ctimestamp1, stimestamp1, ctimestamp2
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
+SELECT
+ ctimestamp1,
+ ctimestamp2,
+ CASE
+ WHEN ctimestamp2 <= date '1800-12-31' THEN "1800s or Earlier"
+ WHEN ctimestamp2 < date '1900-01-01' THEN "1900s"
+ WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s"
+ ELSE "Unknown" END AS ctimestamp2_Description,
+ CASE
+ WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old"
+ WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s"
+ WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s"
+ ELSE NULL END AS ctimestamp2_Description_2,
+ CASE
+ WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old"
+ WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s"
+ WHEN ctimestamp2 between timestamp '2006-01-01 00:00:00.000' and timestamp '2010-12-31 23:59:59.999999999' THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN NULL
+ ELSE NULL END AS ctimestamp2_Description_3,
+ IF(timestamp '1974-10-04 17:21:03.989' > ctimestamp1, year(ctimestamp1), year(ctimestamp2)) AS field1,
+ CASE WHEN stimestamp1 LIKE '%19%'
+ THEN stimestamp1
+ ELSE timestamp '2018-03-08 23:04:59' END AS Field_2,
+ IF(ctimestamp1 = timestamp '2021-09-24 03:18:32.413655165' , NULL, minute(ctimestamp1)) AS Field_3,
+ IF(ctimestamp2 >= timestamp '5344-10-04 18:40:08.165' and ctimestamp2 < timestamp '6631-11-13 16:31:29.702202248', minute(ctimestamp1), NULL) AS Field_4,
+ IF(cast(ctimestamp1 as double) % 500 > 100, DATE_ADD(cdate, 1), DATE_ADD(cdate, 365)) AS Field_5
+FROM timestamps
+ORDER BY ctimestamp1, stimestamp1, ctimestamp2
+POSTHOOK: type: QUERY
+Explain
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: timestamps
+ Statistics: Num rows: 51 Data size: 16000 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:cdate:date, 1:ctimestamp1:timestamp, 2:stimestamp1:string, 3:ctimestamp2:timestamp, 4:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>]
+ Select Operator
+ expressions: ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), CASE WHEN ((ctimestamp2 <= TIMESTAMP'1800-12-31 00:00:00.0')) THEN ('1800s or Earlier') WHEN ((ctimestamp2 < TIMESTAMP'1900-01-01 00:00:00.0')) THEN ('1900s') WHEN (ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00.0' AND TIMESTAMP'2010-12-31 23:59:59.999999999') THEN ('Late 2000s') WHEN ((ctimestamp2 <= TIMESTAMP'2015-12-31 23:59:59.999999999')) THEN ('Early 2010s') ELSE ('Unknown') END (type: string), CASE WHEN ((ctimestamp2 <= TIMESTAMP'2000-12-31 23:59:59.999999999')) THEN ('Old') WHEN ((ctimestamp2 < TIMESTAMP'2006-01-01 00:00:00.0')) THEN ('Early 2000s') WHEN (ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00.0' AND TIMESTAMP'2010-12-31 23:59:59.999999999') THEN ('Late 2000s') WHEN ((ctimestamp2 <= TIMESTAMP'2015-12-31 23:59:59.999999999')) THEN ('Early 2010s') ELSE (null) END (type: string), CASE WHEN ((ctimestamp2 <= TIMESTAMP'2000-12-31 23:59:59.999999999')) THEN ('Old') WHEN ((ct
imestamp2 < TIMESTAMP'2006-01-01 00:00:00.0')) THEN ('Early 2000s') WHEN (ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00.0' AND TIMESTAMP'2010-12-31 23:59:59.999999999') THEN ('Late 2000s') WHEN ((ctimestamp2 <= TIMESTAMP'2015-12-31 23:59:59.999999999')) THEN (null) ELSE (null) END (type: string), if((TIMESTAMP'1974-10-04 17:21:03.989' > ctimestamp1), year(ctimestamp1), year(ctimestamp2)) (type: int), CASE WHEN ((stimestamp1 like '%19%')) THEN (stimestamp1) ELSE (TIMESTAMP'2018-03-08 23:04:59.0') END (type: string), if((ctimestamp1 = TIMESTAMP'2021-09-24 03:18:32.413655165'), null, minute(ctimestamp1)) (type: int), if(((ctimestamp2 >= TIMESTAMP'5344-10-04 18:40:08.165') and (ctimestamp2 < TIMESTAMP'6631-11-13 16:31:29.702202248')), minute(ctimestamp1), null) (type: int), if(((UDFToDouble(ctimestamp1) % 500.0D) > 100.0D), date_add(cdate, 1), date_add(cdate, 365)) (type: date), stimestamp1 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [1, 3, 10, 12, 13, 14, 11, 7, 16, 23, 2]
+ selectExpressions: IfExprStringScalarStringGroupColumn(col 5:boolean, val 1800s or Earliercol 9:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 1800-12-31 00:00:00.0) -> 5:boolean, IfExprStringScalarStringGroupColumn(col 6:boolean, val 1900scol 10:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 1900-01-01 00:00:00.0) -> 6:boolean, IfExprStringScalarStringGroupColumn(col 7:boolean, val Late 2000scol 9:string)(children: VectorUDFAdaptor(ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00.0' AND TIMESTAMP'2010-12-31 23:59:59.999999999') -> 7:boolean, IfExprStringScalarStringScalar(col 8:boolean, val Early 2010s, val Unknown)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2015-12-31 23:59:59.999999999) -> 8:boolean) -> 9:string) -> 10:string) -> 9:string) -> 10:string, IfExprStringScalarStringGroupColumn(col 5:boolean, val Oldcol 11:string)(children: TimestampColLessEqualTimestampScalar(col 3:t
imestamp, val 2000-12-31 23:59:59.999999999) -> 5:boolean, IfExprStringScalarStringGroupColumn(col 6:boolean, val Early 2000scol 12:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 2006-01-01 00:00:00.0) -> 6:boolean, IfExprStringScalarStringGroupColumn(col 7:boolean, val Late 2000scol 11:string)(children: VectorUDFAdaptor(ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00.0' AND TIMESTAMP'2010-12-31 23:59:59.999999999') -> 7:boolean, IfExprColumnNull(col 8:boolean, col 9:string, null)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2015-12-31 23:59:59.999999999) -> 8:boolean, ConstantVectorExpression(val Early 2010s) -> 9:string) -> 11:string) -> 12:string) -> 11:string) -> 12:string, IfExprStringScalarStringGroupColumn(col 5:boolean, val Oldcol 11:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2000-12-31 23:59:59.999999999) -> 5:boolean, IfExprStringScalarStringGroupColumn(col 6:boolean, val Early 2000scol 13:st
ring)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 2006-01-01 00:00:00.0) -> 6:boolean, IfExprStringScalarStringGroupColumn(col 7:boolean, val Late 2000scol 11:string)(children: VectorUDFAdaptor(ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00.0' AND TIMESTAMP'2010-12-31 23:59:59.999999999') -> 7:boolean, IfExprNullNull(null, null) -> 11:string) -> 13:string) -> 11:string) -> 13:string, IfExprLongColumnLongColumn(col 5:boolean, col 6:int, col 7:int)(children: TimestampScalarGreaterTimestampColumn(val 1974-10-04 17:21:03.989, col 1:timestamp) -> 5:boolean, VectorUDFYearTimestamp(col 1:timestamp, field YEAR) -> 6:int, VectorUDFYearTimestamp(col 3:timestamp, field YEAR) -> 7:int) -> 14:int, VectorUDFAdaptor(CASE WHEN ((stimestamp1 like '%19%')) THEN (stimestamp1) ELSE (TIMESTAMP'2018-03-08 23:04:59.0') END)(children: SelectStringColLikeStringScalar(col 2:string) -> 5:boolean) -> 11:string, IfExprNullColumn(col 5:boolean, null, col 6)(children: TimestampColEqualTimest
ampScalar(col 1:timestamp, val 2021-09-24 03:18:32.413655165) -> 5:boolean, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 6:int) -> 7:int, IfExprColumnNull(col 17:boolean, col 15:int, null)(children: ColAndCol(col 15:boolean, col 16:boolean)(children: TimestampColGreaterEqualTimestampScalar(col 3:timestamp, val 5344-10-04 18:40:08.165) -> 15:boolean, TimestampColLessTimestampScalar(col 3:timestamp, val 6631-11-13 16:31:29.702202248) -> 16:boolean) -> 17:boolean, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 15:int) -> 16:int, IfExprLongColumnLongColumn(col 20:boolean, col 21:date, col 22:date)(children: DoubleColGreaterDoubleScalar(col 19:double, val 100.0)(children: DoubleColModuloDoubleScalar(col 18:double, val 500.0)(children: CastTimestampToDouble(col 1:timestamp) -> 18:double) -> 19:double) -> 20:boolean, VectorUDFDateAddColScalar(col 0:date, val 1) -> 21:date, VectorUDFDateAddColScalar(col 0:date, val 365) -> 22:date) -> 23:date
+ Statistics: Num rows: 51 Data size: 16000 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: timestamp), _col10 (type: string), _col1 (type: timestamp)
+ sort order: +++
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ keyColumnNums: [1, 2, 3]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ valueColumnNums: [10, 12, 13, 14, 11, 7, 16, 23]
+ Statistics: Num rows: 51 Data size: 16000 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: int), _col8 (type: int), _col9 (type: date)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: []
+ featureSupportInUse: []
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: true
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 4
+ includeColumns: [0, 1, 2, 3]
+ dataColumns: cdate:date, ctimestamp1:timestamp, stimestamp1:string, ctimestamp2:timestamp
+ partitionColumnCount: 0
+ scratchColumnTypeNames: [bigint, bigint, bigint, bigint, string, string, string, string, string, bigint, bigint, bigint, bigint, double, double, bigint, bigint, bigint, bigint]
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ reduceColumnNullOrder: aaa
+ reduceColumnSortOrder: +++
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 11
+ dataColumns: KEY.reducesinkkey0:timestamp, KEY.reducesinkkey1:string, KEY.reducesinkkey2:timestamp, VALUE._col0:string, VALUE._col1:string, VALUE._col2:string, VALUE._col3:int, VALUE._col4:string, VALUE._col5:int, VALUE._col6:int, VALUE._col7:date
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey2 (type: timestamp), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: int), VALUE._col4 (type: string), VALUE._col5 (type: int), VALUE._col6 (type: int), VALUE._col7 (type: date)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+ Statistics: Num rows: 51 Data size: 16000 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 51 Data size: 16000 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT
+ ctimestamp1,
+ ctimestamp2,
+ CASE
+ WHEN ctimestamp2 <= date '1800-12-31' THEN "1800s or Earlier"
+ WHEN ctimestamp2 < date '1900-01-01' THEN "1900s"
+ WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s"
+ ELSE "Unknown" END AS ctimestamp2_Description,
+ CASE
+ WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old"
+ WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s"
+ WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s"
+ ELSE NULL END AS ctimestamp2_Description_2,
+ CASE
+ WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old"
+ WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s"
+ WHEN ctimestamp2 between timestamp '2006-01-01 00:00:00.000' and timestamp '2010-12-31 23:59:59.999999999' THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN NULL
+ ELSE NULL END AS ctimestamp2_Description_3,
+ IF(timestamp '1974-10-04 17:21:03.989' > ctimestamp1, year(ctimestamp1), year(ctimestamp2)) AS field1,
+ CASE WHEN stimestamp1 LIKE '%19%'
+ THEN stimestamp1
+ ELSE timestamp '2018-03-08 23:04:59' END AS Field_2,
+ IF(ctimestamp1 = timestamp '2021-09-24 03:18:32.413655165' , NULL, minute(ctimestamp1)) AS Field_3,
+ IF(ctimestamp2 >= timestamp '5344-10-04 18:40:08.165' and ctimestamp2 < timestamp '6631-11-13 16:31:29.702202248', minute(ctimestamp1), NULL) AS Field_4,
+ IF(cast(ctimestamp1 as double) % 500 > 100, DATE_ADD(cdate, 1), DATE_ADD(cdate, 365)) AS Field_5
+FROM timestamps
+ORDER BY ctimestamp1, stimestamp1, ctimestamp2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@timestamps
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT
+ ctimestamp1,
+ ctimestamp2,
+ CASE
+ WHEN ctimestamp2 <= date '1800-12-31' THEN "1800s or Earlier"
+ WHEN ctimestamp2 < date '1900-01-01' THEN "1900s"
+ WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s"
+ ELSE "Unknown" END AS ctimestamp2_Description,
+ CASE
+ WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old"
+ WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s"
+ WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s"
+ ELSE NULL END AS ctimestamp2_Description_2,
+ CASE
+ WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old"
+ WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s"
+ WHEN ctimestamp2 between timestamp '2006-01-01 00:00:00.000' and timestamp '2010-12-31 23:59:59.999999999' THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN NULL
+ ELSE NULL END AS ctimestamp2_Description_3,
+ IF(timestamp '1974-10-04 17:21:03.989' > ctimestamp1, year(ctimestamp1), year(ctimestamp2)) AS field1,
+ CASE WHEN stimestamp1 LIKE '%19%'
+ THEN stimestamp1
+ ELSE timestamp '2018-03-08 23:04:59' END AS Field_2,
+ IF(ctimestamp1 = timestamp '2021-09-24 03:18:32.413655165' , NULL, minute(ctimestamp1)) AS Field_3,
+ IF(ctimestamp2 >= timestamp '5344-10-04 18:40:08.165' and ctimestamp2 < timestamp '6631-11-13 16:31:29.702202248', minute(ctimestamp1), NULL) AS Field_4,
+ IF(cast(ctimestamp1 as double) % 500 > 100, DATE_ADD(cdate, 1), DATE_ADD(cdate, 365)) AS Field_5
+FROM timestamps
+ORDER BY ctimestamp1, stimestamp1, ctimestamp2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@timestamps
+#### A masked pattern was here ####
+ctimestamp1 ctimestamp2 ctimestamp2_description ctimestamp2_description_2 ctimestamp2_description_3 field1 field_2 field_3 field_4 field_5
+NULL NULL Unknown NULL NULL NULL 2018-03-08 23:04:59 NULL NULL NULL
+0004-09-22 18:26:29.519542222 0004-09-21 16:23:25.519542222 1800s or Earlier Old Old 4 0004-09-22 18:26:29.519542222 26 NULL 0005-09-22
+0528-10-27 08:15:18.941718273 0528-10-26 06:12:14.941718273 1800s or Earlier Old Old 528 2018-03-08 23:04:59 15 NULL 0529-10-27
+1319-02-02 16:31:57.778 1319-02-01 14:28:53.778 1800s or Earlier Old Old 1319 1319-02-02 16:31:57.778 31 NULL 1320-02-02
+1404-07-23 15:32:16.059185026 1404-07-22 13:29:12.059185026 1800s or Earlier Old Old 1404 2018-03-08 23:04:59 32 NULL 1405-07-23
+1815-05-06 00:12:37.543584705 1815-05-04 22:09:33.543584705 1900s Old Old 1815 2018-03-08 23:04:59 12 NULL 1816-05-05
+1883-04-17 04:14:34.647766229 1883-04-16 02:11:30.647766229 1900s Old Old 1883 2018-03-08 23:04:59 14 NULL 1884-04-16
+1966-08-16 13:36:50.183618031 1966-08-15 11:33:46.183618031 Early 2010s Old Old 1966 1966-08-16 13:36:50.183618031 36 NULL 1967-08-16
+1973-04-17 06:30:38.596784156 1973-04-16 04:27:34.596784156 Early 2010s Old Old 1973 1973-04-17 06:30:38.596784156 30 NULL 1974-04-17
+1974-10-04 17:21:03.989 1974-10-03 15:17:59.989 Early 2010s Old Old 1974 1974-10-04 17:21:03.989 21 NULL 1974-10-05
+1976-03-03 04:54:33.000895162 1976-03-02 02:51:29.000895162 Early 2010s Old Old 1976 1976-03-03 04:54:33.000895162 54 NULL 1976-03-04
+1976-05-06 00:42:30.910786948 1976-05-04 22:39:26.910786948 Early 2010s Old Old 1976 1976-05-06 00:42:30.910786948 42 NULL 1977-05-06
+1978-08-05 14:41:05.501 1978-08-04 12:38:01.501 Early 2010s Old Old 1978 1978-08-05 14:41:05.501 41 NULL 1978-08-06
+1981-04-25 09:01:12.077192689 1981-04-24 06:58:08.077192689 Early 2010s Old Old 1981 1981-04-25 09:01:12.077192689 1 NULL 1982-04-25
+1981-11-15 23:03:10.999338387 1981-11-14 21:00:06.999338387 Early 2010s Old Old 1981 1981-11-15 23:03:10.999338387 3 NULL 1981-11-16
+1985-07-20 09:30:11 1985-07-19 07:27:07 Early 2010s Old Old 1985 1985-07-20 09:30:11 30 NULL 1986-07-20
+1985-11-18 16:37:54 1985-11-17 14:34:50 Early 2010s Old Old 1985 1985-11-18 16:37:54 37 NULL 1985-11-19
+1987-02-21 19:48:29 1987-02-20 17:45:25 Early 2010s Old Old 1987 1987-02-21 19:48:29 48 NULL 1987-02-22
+1987-05-28 13:52:07.900916635 1987-05-27 11:49:03.900916635 Early 2010s Old Old 1987 1987-05-28 13:52:07.900916635 52 NULL 1988-05-27
+1998-10-16 20:05:29.397591987 1998-10-15 18:02:25.397591987 Early 2010s Old Old 1998 1998-10-16 20:05:29.397591987 5 NULL 1999-10-16
+1999-10-03 16:59:10.396903939 1999-10-02 14:56:06.396903939 Early 2010s Old Old 1999 1999-10-03 16:59:10.396903939 59 NULL 1999-10-04
+2000-12-18 08:42:30.000595596 2000-12-17 06:39:26.000595596 Early 2010s Old Old 2000 2018-03-08 23:04:59 42 NULL 2000-12-19
+2002-05-10 05:29:48.990818073 2002-05-09 03:26:44.990818073 Early 2010s Early 2000s Early 2000s 2002 2018-03-08 23:04:59 29 NULL 2002-05-11
+2003-09-23 22:33:17.00003252 2003-09-22 20:30:13.00003252 Early 2010s Early 2000s Early 2000s 2003 2018-03-08 23:04:59 33 NULL 2004-09-22
+2004-03-07 20:14:13 2004-03-06 18:11:09 Early 2010s Early 2000s Early 2000s 2004 2018-03-08 23:04:59 14 NULL 2004-03-08
+2007-02-09 05:17:29.368756876 2007-02-08 03:14:25.368756876 Late 2000s Late 2000s Late 2000s 2007 2018-03-08 23:04:59 17 NULL 2008-02-09
+2009-01-21 10:49:07.108 2009-01-20 08:46:03.108 Late 2000s Late 2000s Late 2000s 2009 2018-03-08 23:04:59 49 NULL 2009-01-22
+2010-04-08 02:43:35.861742727 2010-04-07 00:40:31.861742727 Late 2000s Late 2000s Late 2000s 2010 2018-03-08 23:04:59 43 NULL 2010-04-09
+2013-04-07 02:44:43.00086821 2013-04-06 00:41:39.00086821 Early 2010s Early 2010s NULL 2013 2018-03-08 23:04:59 44 NULL 2013-04-08
+2013-04-10 00:43:46.854731546 2013-04-08 22:40:42.854731546 Early 2010s Early 2010s NULL 2013 2018-03-08 23:04:59 43 NULL 2013-04-11
+2021-09-24 03:18:32.413655165 2021-09-23 01:15:28.413655165 Unknown NULL NULL 2021 2018-03-08 23:04:59 NULL NULL 2021-09-25
+2024-11-11 16:42:41.101 2024-11-10 14:39:37.101 Unknown NULL NULL 2024 2018-03-08 23:04:59 42 NULL 2024-11-12
+4143-07-08 10:53:27.252802259 4143-07-07 08:50:23.252802259 Unknown NULL NULL 4143 2018-03-08 23:04:59 53 NULL 4143-07-09
+4966-12-04 09:30:55.202 4966-12-03 07:27:51.202 Unknown NULL NULL 4966 2018-03-08 23:04:59 30 NULL 4966-12-05
+5339-02-01 14:10:01.085678691 5339-01-31 12:06:57.085678691 Unknown NULL NULL 5339 2018-03-08 23:04:59 10 NULL 5340-02-01
+5344-10-04 18:40:08.165 5344-10-03 16:37:04.165 Unknown NULL NULL 5344 2018-03-08 23:04:59 40 NULL 5344-10-05
+5397-07-13 07:12:32.000896438 5397-07-12 05:09:28.000896438 Unknown NULL NULL 5397 2018-03-08 23:04:59 12 12 5397-07-14
+5966-07-09 03:30:50.597 5966-07-08 01:27:46.597 Unknown NULL NULL 5966 2018-03-08 23:04:59 30 30 5967-07-09
+6229-06-28 02:54:28.970117179 6229-06-27 00:51:24.970117179 Unknown NULL NULL 6229 2018-03-08 23:04:59 54 54 6230-06-28
+6482-04-27 12:07:38.073915413 6482-04-26 10:04:34.073915413 Unknown NULL NULL 6482 2018-03-08 23:04:59 7 7 6482-04-28
+6631-11-13 16:31:29.702202248 6631-11-12 14:28:25.702202248 Unknown NULL NULL 6631 2018-03-08 23:04:59 31 31 6631-11-14
+6705-09-28 18:27:28.000845672 6705-09-27 16:24:24.000845672 Unknown NULL NULL 6705 2018-03-08 23:04:59 27 NULL 6705-09-29
+6731-02-12 08:12:48.287783702 6731-02-11 06:09:44.287783702 Unknown NULL NULL 6731 2018-03-08 23:04:59 12 NULL 6731-02-13
+7160-12-02 06:00:24.81200852 7160-12-01 03:57:20.81200852 Unknown NULL NULL 7160 2018-03-08 23:04:59 0 NULL 7161-12-02
+7409-09-07 23:33:32.459349602 7409-09-06 21:30:28.459349602 Unknown NULL NULL 7409 2018-03-08 23:04:59 33 NULL 7409-09-08
+7503-06-23 23:14:17.486 7503-06-22 21:11:13.486 Unknown NULL NULL 7503 2018-03-08 23:04:59 14 NULL 7503-06-24
+8422-07-22 03:21:45.745036084 8422-07-21 01:18:41.745036084 Unknown NULL NULL 8422 2018-03-08 23:04:59 21 NULL 8422-07-23
+8521-01-16 20:42:05.668832388 8521-01-15 18:39:01.668832388 Unknown NULL NULL 8521 2018-03-08 23:04:59 42 NULL 8521-01-17
+9075-06-13 16:20:09.218517797 9075-06-12 14:17:05.218517797 Unknown NULL NULL 9075 2018-03-08 23:04:59 20 NULL 9075-06-14
+9209-11-11 04:08:58.223768453 9209-11-10 02:05:54.223768453 Unknown NULL NULL 9209 2018-03-08 23:04:59 8 NULL 9209-11-12
+9403-01-09 18:12:33.547 9403-01-08 16:09:29.547 Unknown NULL NULL 9403 2018-03-08 23:04:59 12 NULL 9403-01-10
+PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
+SELECT
+ ctimestamp1,
+ ctimestamp2,
+ CASE
+ WHEN ctimestamp2 <= date '1800-12-31' THEN "1800s or Earlier"
+ WHEN ctimestamp2 < date '1900-01-01' THEN "1900s"
+ WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s"
+ ELSE "Unknown" END AS ctimestamp2_Description,
+ CASE
+ WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old"
+ WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s"
+ WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s"
+ ELSE NULL END AS ctimestamp2_Description_2,
+ CASE
+ WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old"
+ WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s"
+ WHEN ctimestamp2 between timestamp '2006-01-01 00:00:00.000' and timestamp '2010-12-31 23:59:59.999999999' THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN NULL
+ ELSE NULL END AS ctimestamp2_Description_3,
+ IF(timestamp '1974-10-04 17:21:03.989' > ctimestamp1, year(ctimestamp1), year(ctimestamp2)) AS field1,
+ CASE WHEN stimestamp1 LIKE '%19%'
+ THEN stimestamp1
+ ELSE timestamp '2018-03-08 23:04:59' END AS Field_2,
+ IF(ctimestamp1 = timestamp '2021-09-24 03:18:32.413655165' , NULL, minute(ctimestamp1)) AS Field_3,
+ IF(ctimestamp2 >= timestamp '5344-10-04 18:40:08.165' and ctimestamp2 < timestamp '6631-11-13 16:31:29.702202248', minute(ctimestamp1), NULL) AS Field_4,
+ IF(cast(ctimestamp1 as double) % 500 > 100, DATE_ADD(cdate, 1), DATE_ADD(cdate, 365)) AS Field_5
+FROM timestamps
+ORDER BY ctimestamp1, stimestamp1, ctimestamp2
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
+SELECT
+ ctimestamp1,
+ ctimestamp2,
+ CASE
+ WHEN ctimestamp2 <= date '1800-12-31' THEN "1800s or Earlier"
+ WHEN ctimestamp2 < date '1900-01-01' THEN "1900s"
+ WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s"
+ ELSE "Unknown" END AS ctimestamp2_Description,
+ CASE
+ WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old"
+ WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s"
+ WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s"
+ ELSE NULL END AS ctimestamp2_Description_2,
+ CASE
+ WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old"
+ WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s"
+ WHEN ctimestamp2 between timestamp '2006-01-01 00:00:00.000' and timestamp '2010-12-31 23:59:59.999999999' THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN NULL
+ ELSE NULL END AS ctimestamp2_Description_3,
+ IF(timestamp '1974-10-04 17:21:03.989' > ctimestamp1, year(ctimestamp1), year(ctimestamp2)) AS field1,
+ CASE WHEN stimestamp1 LIKE '%19%'
+ THEN stimestamp1
+ ELSE timestamp '2018-03-08 23:04:59' END AS Field_2,
+ IF(ctimestamp1 = timestamp '2021-09-24 03:18:32.413655165' , NULL, minute(ctimestamp1)) AS Field_3,
+ IF(ctimestamp2 >= timestamp '5344-10-04 18:40:08.165' and ctimestamp2 < timestamp '6631-11-13 16:31:29.702202248', minute(ctimestamp1), NULL) AS Field_4,
+ IF(cast(ctimestamp1 as double) % 500 > 100, DATE_ADD(cdate, 1), DATE_ADD(cdate, 365)) AS Field_5
+FROM timestamps
+ORDER BY ctimestamp1, stimestamp1, ctimestamp2
+POSTHOOK: type: QUERY
+Explain
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: timestamps
+ Statistics: Num rows: 51 Data size: 16000 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:cdate:date, 1:ctimestamp1:timestamp, 2:stimestamp1:string, 3:ctimestamp2:timestamp, 4:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>]
+ Select Operator
+ expressions: ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), CASE WHEN ((ctimestamp2 <= TIMESTAMP'1800-12-31 00:00:00.0')) THEN ('1800s or Earlier') WHEN ((ctimestamp2 < TIMESTAMP'1900-01-01 00:00:00.0')) THEN ('1900s') WHEN (ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00.0' AND TIMESTAMP'2010-12-31 23:59:59.999999999') THEN ('Late 2000s') WHEN ((ctimestamp2 <= TIMESTAMP'2015-12-31 23:59:59.999999999')) THEN ('Early 2010s') ELSE ('Unknown') END (type: string), CASE WHEN ((ctimestamp2 <= TIMESTAMP'2000-12-31 23:59:59.999999999')) THEN ('Old') WHEN ((ctimestamp2 < TIMESTAMP'2006-01-01 00:00:00.0')) THEN ('Early 2000s') WHEN (ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00.0' AND TIMESTAMP'2010-12-31 23:59:59.999999999') THEN ('Late 2000s') WHEN ((ctimestamp2 <= TIMESTAMP'2015-12-31 23:59:59.999999999')) THEN ('Early 2010s') ELSE (null) END (type: string), CASE WHEN ((ctimestamp2 <= TIMESTAMP'2000-12-31 23:59:59.999999999')) THEN ('Old') WHEN ((ct
imestamp2 < TIMESTAMP'2006-01-01 00:00:00.0')) THEN ('Early 2000s') WHEN (ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00.0' AND TIMESTAMP'2010-12-31 23:59:59.999999999') THEN ('Late 2000s') WHEN ((ctimestamp2 <= TIMESTAMP'2015-12-31 23:59:59.999999999')) THEN (null) ELSE (null) END (type: string), if((TIMESTAMP'1974-10-04 17:21:03.989' > ctimestamp1), year(ctimestamp1), year(ctimestamp2)) (type: int), CASE WHEN ((stimestamp1 like '%19%')) THEN (stimestamp1) ELSE (TIMESTAMP'2018-03-08 23:04:59.0') END (type: string), if((ctimestamp1 = TIMESTAMP'2021-09-24 03:18:32.413655165'), null, minute(ctimestamp1)) (type: int), if(((ctimestamp2 >= TIMESTAMP'5344-10-04 18:40:08.165') and (ctimestamp2 < TIMESTAMP'6631-11-13 16:31:29.702202248')), minute(ctimestamp1), null) (type: int), if(((UDFToDouble(ctimestamp1) % 500.0D) > 100.0D), date_add(cdate, 1), date_add(cdate, 365)) (type: date), stimestamp1 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [1, 3, 15, 26, 36, 40, 42, 44, 46, 53, 2]
+ selectExpressions: IfExprColumnCondExpr(col 5:boolean, col 6:stringcol 14:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 1800-12-31 00:00:00.0) -> 5:boolean, ConstantVectorExpression(val 1800s or Earlier) -> 6:string, IfExprColumnCondExpr(col 7:boolean, col 8:stringcol 13:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 1900-01-01 00:00:00.0) -> 7:boolean, ConstantVectorExpression(val 1900s) -> 8:string, IfExprColumnCondExpr(col 9:boolean, col 10:stringcol 12:string)(children: VectorUDFAdaptor(ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00.0' AND TIMESTAMP'2010-12-31 23:59:59.999999999') -> 9:boolean, ConstantVectorExpression(val Late 2000s) -> 10:string, IfExprStringScalarStringScalar(col 11:boolean, val Early 2010s, val Unknown)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2015-12-31 23:59:59.999999999) -> 11:boolean) -> 12:string) -> 13:string) -> 14:string) -> 15:string, IfExprCo
lumnCondExpr(col 11:boolean, col 16:stringcol 25:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2000-12-31 23:59:59.999999999) -> 11:boolean, ConstantVectorExpression(val Old) -> 16:string, IfExprColumnCondExpr(col 17:boolean, col 18:stringcol 24:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 2006-01-01 00:00:00.0) -> 17:boolean, ConstantVectorExpression(val Early 2000s) -> 18:string, IfExprColumnCondExpr(col 19:boolean, col 20:stringcol 23:string)(children: VectorUDFAdaptor(ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00.0' AND TIMESTAMP'2010-12-31 23:59:59.999999999') -> 19:boolean, ConstantVectorExpression(val Late 2000s) -> 20:string, IfExprColumnNull(col 21:boolean, col 22:string, null)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2015-12-31 23:59:59.999999999) -> 21:boolean, ConstantVectorExpression(val Early 2010s) -> 22:string) -> 23:string) -> 24:string) -> 25:string) -> 26:string, IfExprColumnCon
dExpr(col 27:boolean, col 28:stringcol 35:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2000-12-31 23:59:59.999999999) -> 27:boolean, ConstantVectorExpression(val Old) -> 28:string, IfExprColumnCondExpr(col 29:boolean, col 30:stringcol 34:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 2006-01-01 00:00:00.0) -> 29:boolean, ConstantVectorExpression(val Early 2000s) -> 30:string, IfExprColumnCondExpr(col 31:boolean, col 32:stringcol 33:string)(children: VectorUDFAdaptor(ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00.0' AND TIMESTAMP'2010-12-31 23:59:59.999999999') -> 31:boolean, ConstantVectorExpression(val Late 2000s) -> 32:string, IfExprNullNull(null, null) -> 33:string) -> 34:string) -> 35:string) -> 36:string, IfExprCondExprCondExpr(col 37:boolean, col 38:intcol 39:int)(children: TimestampScalarGreaterTimestampColumn(val 1974-10-04 17:21:03.989, col 1:timestamp) -> 37:boolean, VectorUDFYearTimestamp(col 1:timestamp, field YEA
R) -> 38:int, VectorUDFYearTimestamp(col 3:timestamp, field YEAR) -> 39:int) -> 40:int, VectorUDFAdaptor(CASE WHEN ((stimestamp1 like '%19%')) THEN (stimestamp1) ELSE (TIMESTAMP'2018-03-08 23:04:59.0') END)(children: SelectStringColLikeStringScalar(col 2:string) -> 41:boolean) -> 42:string, IfExprNullCondExpr(col 41:boolean, null, col 43:int)(children: TimestampColEqualTimestampScalar(col 1:timestamp, val 2021-09-24 03:18:32.413655165) -> 41:boolean, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 43:int) -> 44:int, IfExprCondExprNull(col 47:boolean, col 45:int, null)(children: ColAndCol(col 45:boolean, col 46:boolean)(children: TimestampColGreaterEqualTimestampScalar(col 3:timestamp, val 5344-10-04 18:40:08.165) -> 45:boolean, TimestampColLessTimestampScalar(col 3:timestamp, val 6631-11-13 16:31:29.702202248) -> 46:boolean) -> 47:boolean, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 45:int) -> 46:int, IfExprCondExprCondExpr(col 50:boolean, col 51:datecol 52
:date)(children: DoubleColGreaterDoubleScalar(col 49:double, val 100.0)(children: DoubleColModuloDoubleScalar(col 48:double, val 500.0)(children: CastTimestampToDouble(col 1:timestamp) -> 48:double) -> 49:double) -> 50:boolean, VectorUDFDateAddColScalar(col 0:date, val 1) -> 51:date, VectorUDFDateAddColScalar(col 0:date, val 365) -> 52:date) -> 53:date
+ Statistics: Num rows: 51 Data size: 16000 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: timestamp), _col10 (type: string), _col1 (type: timestamp)
+ sort order: +++
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ keyColumnNums: [1, 2, 3]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ valueColumnNums: [15, 26, 36, 40, 42, 44, 46, 53]
+ Statistics: Num rows: 51 Data size: 16000 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: int), _col8 (type: int), _col9 (type: date)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: []
+ featureSupportInUse: []
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: true
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 4
+ includeColumns: [0, 1, 2, 3]
+ dataColumns: cdate:date, ctimestamp1:timestamp, stimestamp1:string, ctimestamp2:timestamp
+ partitionColumnCount: 0
+ scratchColumnTypeNames: [bigint, string, bigint, string, bigint, string, bigint, string, string, string, string, string, bigint, string, bigint, string, bigint, string, string, string, string, string, bigint, string, bigint, string, bigint, string, string, string, string, string, bigint, bigint, bigint, bigint, bigint, string, bigint, bigint, bigint, bigint, bigint, double, double, bigint, bigint, bigint, bigint]
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ reduceColumnNullOrder: aaa
+ reduceColumnSortOrder: +++
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 11
+ dataColumns: KEY.reducesinkkey0:timestamp, KEY.reducesinkkey1:string, KEY.reducesinkkey2:timestamp, VALUE._col0:string, VALUE._col1:string, VALUE._col2:string, VALUE._col3:int, VALUE._col4:string, VALUE._col5:int, VALUE._col6:int, VALUE._col7:date
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey2 (type: timestamp), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: int), VALUE._col4 (type: string), VALUE._col5 (type: int), VALUE._col6 (type: int), VALUE._col7 (type: date)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+ Statistics: Num rows: 51 Data size: 16000 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 51 Data size: 16000 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT
+ ctimestamp1,
+ ctimestamp2,
+ CASE
+ WHEN ctimestamp2 <= date '1800-12-31' THEN "1800s or Earlier"
+ WHEN ctimestamp2 < date '1900-01-01' THEN "1900s"
+ WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s"
+ ELSE "Unknown" END AS ctimestamp2_Description,
+ CASE
+ WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old"
+ WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s"
+ WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s"
+ ELSE NULL END AS ctimestamp2_Description_2,
+ CASE
+ WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old"
+ WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s"
+ WHEN ctimestamp2 between timestamp '2006-01-01 00:00:00.000' and timestamp '2010-12-31 23:59:59.999999999' THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN NULL
+ ELSE NULL END AS ctimestamp2_Description_3,
+ IF(timestamp '1974-10-04 17:21:03.989' > ctimestamp1, year(ctimestamp1), year(ctimestamp2)) AS field1,
+ CASE WHEN stimestamp1 LIKE '%19%'
+ THEN stimestamp1
+ ELSE timestamp '2018-03-08 23:04:59' END AS Field_2,
+ IF(ctimestamp1 = timestamp '2021-09-24 03:18:32.413655165' , NULL, minute(ctimestamp1)) AS Field_3,
+ IF(ctimestamp2 >= timestamp '5344-10-04 18:40:08.165' and ctimestamp2 < timestamp '6631-11-13 16:31:29.702202248', minute(ctimestamp1), NULL) AS Field_4,
+ IF(cast(ctimestamp1 as double) % 500 > 100, DATE_ADD(cdate, 1), DATE_ADD(cdate, 365)) AS Field_5
+FROM timestamps
+ORDER BY ctimestamp1, stimestamp1, ctimestamp2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@timestamps
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT
+ ctimestamp1,
+ ctimestamp2,
+ CASE
+ WHEN ctimestamp2 <= date '1800-12-31' THEN "1800s or Earlier"
+ WHEN ctimestamp2 < date '1900-01-01' THEN "1900s"
+ WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s"
+ ELSE "Unknown" END AS ctimestamp2_Description,
+ CASE
+ WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old"
+ WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s"
+ WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s"
+ ELSE NULL END AS ctimestamp2_Description_2,
+ CASE
+ WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old"
+ WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s"
+ WHEN ctimestamp2 between timestamp '2006-01-01 00:00:00.000' and timestamp '2010-12-31 23:59:59.999999999' THEN "Late 2000s"
+ WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN NULL
+ ELSE NULL END AS ctimestamp2_Description_3,
+ IF(timestamp '1974-10-04 17:21:03.989' > ctimestamp1, year(ctimestamp1), year(ctimestamp2)) AS field1,
+ CASE WHEN stimestamp1 LIKE '%19%'
+ THEN stimestamp1
+ ELSE timestamp '2018-03-08 23:04:59' END AS Field_2,
+ IF(ctimestamp1 = timestamp '2021-09-24 03:18:32.413655165' , NULL, minute(ctimestamp1)) AS Field_3,
+ IF(ctimestamp2 >= timestamp '5344-10-04 18:40:08.165' and ctimestamp2 < timestamp '6631-11-13 16:31:29.702202248', minute(ctimestamp1), NULL) AS Field_4,
+ IF(cast(ctimestamp1 as double) % 500 > 100, DATE_ADD(cdate, 1), DATE_ADD(cdate, 365)) AS Field_5
+FROM timestamps
+ORDER BY ctimestamp1, stimestamp1, ctimestamp2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@timestamps
+#### A masked pattern was here ####
+ctimestamp1 ctimestamp2 ctimestamp2_description ctimestamp2_description_2 ctimestamp2_description_3 field1 field_2 field_3 field_4 field_5
+NULL NULL Unknown NULL NULL NULL 2018-03-08 23:04:59 NULL NULL NULL
+0004-09-22 18:26:29.519542222 0004-09-21 16:23:25.519542222 1800s or Earlier Old Old 4 0004-09-22 18:26:29.519542222 26 NULL 0005-09-22
+0528-10-27 08:15:18.941718273 0528-10-26 06:12:14.941718273 1800s or Earlier Old Old 528 2018-03-08 23:04:59 15 NULL 0529-10-27
+1319-02-02 16:31:57.778 1319-02-01 14:28:53.778 1800s or Earlier Old Old 1319 1319-02-02 16:31:57.778 31 NULL 1320-02-02
+1404-07-23 15:32:16.059185026 1404-07-22 13:29:12.059185026 1800s or Earlier Old Old 1404 2018-03-08 23:04:59 32 NULL 1405-07-23
+1815-05-06 00:12:37.543584705 1815-05-04 22:09:33.543584705 1900s Old Old 1815 2018-03-08 23:04:59 12 NULL 1816-05-05
+1883-04-17 04:14:34.647766229 1883-04-16 02:11:30.647766229 1900s Old Old 1883 2018-03-08 23:04:59 14 NULL 1884-04-16
+1966-08-16 13:36:50.183618031 1966-08-15 11:33:46.183618031 Early 2010s Old Old 1966 1966-08-16 13:36:50.183618031 36 NULL 1967-08-16
+1973-04-17 06:30:38.596784156 1973-04-16 04:27:34.596784156 Early 2010s Old Old 1973 1973-04-17 06:30:38.596784156 30 NULL 1974-04-17
+1974-10-04 17:21:03.989 1974-10-03 15:17:59.989 Early 2010s Old Old 1974 1974-10-04 17:21:03.989 21 NULL 1974-10-05
+1976-03-03 04:54:33.000895162 1976-03-02 02:51:29.000895162 Early 2010s Old Old 1976 1976-03-03 04:54:33.000895162 54 NULL 1976-03-04
+1976-05-06 00:42:30.910786948 1976-05-04 22:39:26.910786948 Early 2010s Old Old 1976 1976-05-06 00:42:30.910786948 42 NULL 1977-05-06
+1978-08-05 14:41:05.501 1978-08-04 12:38:01.501 Early 2010s Old Old 1978 1978-08-05 14:41:05.501 41 NULL 1978-08-06
+1981-04-25 09:01:12.077192689 1981-04-24 06:58:08.077192689 Early 2010s Old Old 1981 1981-04-25 09:01:12.077192689 1 NULL 1982-04-25
+1981-11-15 23:03:10.999338387 1981-11-14 21:00:06.999338387 Early 2010s Old Old 1981 1981-11-15 23:03:10.999338387 3 NULL 1981-11-16
+1985-07-20 09:30:11 1985-07-19 07:27:07 Early 2010s Old Old 1985 1985-07-20 09:30:11 30 NULL 1986-07-20
+1985-11-18 16:37:54 1985-11-17 14:34:50 Early 2010s Old Old 1985 1985-11-18 16:37:54 37 NULL 1985-11-19
+1987-02-21 19:48:29 1987-02-20 17:45:25 Early 2010s Old Old 1987 1987-02-21 19:48:29 48 NULL 1987-02-22
+1987-05-28 13:52:07.900916635 1987-05-27 11:49:03.900916635 Early 2010s Old Old 1987 1987-05-28 13:52:07.900916635 52 NULL 1988-05-27
+1998-10-16 20:05:29.397591987 1998-10-15 18:02:25.397591987 Early 2010s Old Old 1998 1998-10-16 20:05:29.397591987 5 NULL 1999-10-16
+1999-10-03 16:59:10.396903939 1999-10-02 14:56:06.396903939 Early 2010s Old Old 1999 1999-10-03 16:59:10.396903939 59 NULL 1999-10-04
+2000-12-18 08:42:30.000595596 2000-12-17 06:39:26.000595596 Early 2010s Old Old 2000 2018-03-08 23:04:59 42 NULL 2000-12-19
+2002-05-10 05:29:48.990818073 2002-05-09 03:26:44.990818073 Early 2010s Early 2000s Early 2000s 2002 2018-03-08 23:04:59 29 NULL 2002-05-11
+2003-09-23 22:33:17.00003252 2003-09-22 20:30:13.00003252 Early 2010s Early 2000s Early 2000s 2003 2018-03-08 23:04:59 33 NULL 2004-09-22
+2004-03-07 20:14:13 2004-03-06 18:11:09 Early 2010s Early 2000s Early 2000s 2004 2018-03-08 23:04:59 14 NULL 2004-03-08
+2007-02-09 05:17:29.368756876 2007-02-08 03:14:25.368756876 Late 2000s Late 2000s Late 2000s 2007 2018-03-08 23:04:59 17 NULL 2008-02-09
+2009-01-21 10:49:07.108 2009-01-20 08:46:03.108 Late 2000s Late 2000s Late 2000s 2009 2018-03-08 23:04:59 49 NULL 2009-01-22
+2010-04-08 02:43:35.861742727 2010-04-07 00:40:31.861742727 Late 2000s Late 2000s Late 2000s 2010 2018-03-08 23:04:59 43 NULL 2010-04-09
+2013-04-07 02:44:43.00086821 2013-04-06 00:41:39.00086821 Early 2010s Early 2010s NULL 2013 2018-03-08 23:04:59 44 NULL 2013-04-08
+2013-04-10 00:43:46.854731546 2013-04-08 22:40:42.854731546 Early 2010s Early 2010s NULL 2013 2018-03-08 23:04:59 43 NULL 2013-04-11
+2021-09-24 03:18:32.413655165 2021-09-23 01:15:28.413655165 Unknown NULL NULL 2021 2018-03-08 23:04:59 NULL NULL 2021-09-25
+2024-11-11 16:42:41.101 2024-11-10 14:39:37.101 Unknown NULL NULL 2024 2018-03-08 23:04:59 42 NULL 2024-11-12
+4143-07-08 10:53:27.252802259 4143-07-07 08:50:23.252802259 Unknown NULL NULL 4143 2018-03-08 23:04:59 53 NULL 4143-07-09
+4966-12-04 09:30:55.202 4966-12-03 07:27:51.202 Unknown NULL NULL 4966 2018-03-08 23:04:59 30 NULL 4966-12-05
+5339-02-01 14:10:01.085678691 5339-01-31 12:06:57.085678691 Unknown NULL NULL 5339 2018-03-08 23:04:59 10 NULL 5340-02-01
+5344-10-04 18:40:08.165 5344-10-03 16:37:04.165 Unknown NULL NULL 5344 2018-03-08 23:04:59 40 NULL 5344-10-05
+5397-07-13 07:12:32.000896438 5397-07-12 05:09:28.000896438 Unknown NULL NULL 5397 2018-03-08 23:04:59 12 12 5397-07-14
+5966-07-09 03:30:50.597 5966-07-08 01:27:46.597 Unknown NULL NULL 5966 2018-03-08 23:04:59 30 30 5967-07-09
+6229-06-28 02:54:28.970117179 6229-06-27 00:51:24.970117179 Unknown NULL NULL 6229 2018-03-08 23:04:59 54 54 6230-06-28
+6482-04-27 12:07:38.073915413 6482-04-26 10:04:34.073915413 Unknown NULL NULL 6482 2018-03-08 23:04:59 7 7 6482-04-28
+6631-11-13 16:31:29.702202248 6631-11-12 14:28:25.702202248 Unknown NULL NULL 6631 2018-03-08 23:04:59 31 31 6631-11-14
+6705-09-28 18:27:28.000845672 6705-09-27 16:24:24.000845672 Unknown NULL NULL 6705 2018-03-08 23:04:59 27 NULL 6705-09-29
+6731-02-12 08:12:48.287783702 6731-02-11 06:09:44.287783702 Unknown NULL NULL 6731 2018-03-08 23:04:59 12 NULL 6731-02-13
+7160-12-02 06:00:24.81200852 7160-12-01 03:57:20.81200852 Unknown NULL NULL 7160 2018-03-08 23:04:59 0 NULL 7161-12-02
+7409-09-07 23:33:32.459349602 7409-09-06 21:30:28.459349602 Unknown NULL NULL 7409 2018-03-08 23:04:59 33 NULL 7409-09-08
+7503-06-23 23:14:17.486 7503-06-22 21:11:13.486 Unknown NULL NULL 7503 2018-03-08 23:04:59 14 NULL 7503-06-24
+8422-07-22 03:21:45.745036084 8422-07-21 01:18:41.745036084 Unknown NULL NULL 8422 2018-03-08 23:04:59 21 NULL 8422-07-23
+8521-01-16 20:42:05.668832388 8521-01-15 18:39:01.668832388 Unknown NULL NULL 8521 2018-03-08 23:04:59 42 NULL 8521-01-17
+9075-06-13 16:20:09.218517797 9075-06-12 14:17:05.218517797 Unknown NULL NULL 9075 2018-03-08 23:04:59 20 NULL 9075-06-14
+9209-11-11 04:08:58.223768453 9209-11-10 02:05:54.223768453 Unknown NULL NULL 9209 2018-03-08 23:04:59 8 NULL 9209-11-12
+9403-01-09 18:12:33.547 9403-01-08 16:09:29.547 Unknown NULL NULL 9403 2018-03-08 23:04:59 12 NULL 9403-01-10
http://git-wip-us.apache.org/repos/asf/hive/blob/53980ba6/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id1.q.out b/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id1.q.out
index e57a0da..9f9fdaf 100644
--- a/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id1.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id1.q.out
@@ -764,8 +764,8 @@ STAGE PLANS:
Select Vectorization:
className: VectorSelectOperator
native: true
- projectedOutputColumnNums: [0, 1, 2, 8]
- selectExpressions: IfExprStringScalarStringGroupColumn(col 3:boolean, val 0col 7:string)(children: LongColEqualLongScalar(col 2:bigint, val 0) -> 3:boolean, IfExprStringScalarStringGroupColumn(col 4:boolean, val 1col 8:string)(children: LongColEqualLongScalar(col 2:bigint, val 1) -> 4:boolean, IfExprStringScalarStringGroupColumn(col 5:boolean, val 2col 7:string)(children: LongColEqualLongScalar(col 2:bigint, val 2) -> 5:boolean, IfExprStringScalarStringScalar(col 6:boolean, val 3, val nothing)(children: LongColEqualLongScalar(col 2:bigint, val 3) -> 6:boolean) -> 7:string) -> 8:string) -> 7:string) -> 8:string
+ projectedOutputColumnNums: [0, 1, 2, 13]
+ selectExpressions: IfExprColumnCondExpr(col 3:boolean, col 4:stringcol 12:string)(children: LongColEqualLongScalar(col 2:bigint, val 0) -> 3:boolean, ConstantVectorExpression(val 0) -> 4:string, IfExprColumnCondExpr(col 5:boolean, col 6:stringcol 11:string)(children: LongColEqualLongScalar(col 2:bigint, val 1) -> 5:boolean, ConstantVectorExpression(val 1) -> 6:string, IfExprColumnCondExpr(col 7:boolean, col 8:stringcol 10:string)(children: LongColEqualLongScalar(col 2:bigint, val 2) -> 7:boolean, ConstantVectorExpression(val 2) -> 8:string, IfExprStringScalarStringScalar(col 9:boolean, val 3, val nothing)(children: LongColEqualLongScalar(col 2:bigint, val 3) -> 9:boolean) -> 10:string) -> 11:string) -> 12:string) -> 13:string
Statistics: Num rows: 12 Data size: 4416 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
@@ -924,8 +924,8 @@ STAGE PLANS:
Select Vectorization:
className: VectorSelectOperator
native: true
- projectedOutputColumnNums: [0, 1, 2, 8]
- selectExpressions: IfExprStringScalarStringGroupColumn(col 3:boolean, val 0col 7:string)(children: LongColEqualLongScalar(col 2:bigint, val 0) -> 3:boolean, IfExprStringScalarStringGroupColumn(col 4:boolean, val 1col 8:string)(children: LongColEqualLongScalar(col 2:bigint, val 1) -> 4:boolean, IfExprStringScalarStringGroupColumn(col 5:boolean, val 2col 7:string)(children: LongColEqualLongScalar(col 2:bigint, val 2) -> 5:boolean, IfExprStringScalarStringScalar(col 6:boolean, val 3, val nothing)(children: LongColEqualLongScalar(col 2:bigint, val 3) -> 6:boolean) -> 7:string) -> 8:string) -> 7:string) -> 8:string
+ projectedOutputColumnNums: [0, 1, 2, 13]
+ selectExpressions: IfExprColumnCondExpr(col 3:boolean, col 4:stringcol 12:string)(children: LongColEqualLongScalar(col 2:bigint, val 0) -> 3:boolean, ConstantVectorExpression(val 0) -> 4:string, IfExprColumnCondExpr(col 5:boolean, col 6:stringcol 11:string)(children: LongColEqualLongScalar(col 2:bigint, val 1) -> 5:boolean, ConstantVectorExpression(val 1) -> 6:string, IfExprColumnCondExpr(col 7:boolean, col 8:stringcol 10:string)(children: LongColEqualLongScalar(col 2:bigint, val 2) -> 7:boolean, ConstantVectorExpression(val 2) -> 8:string, IfExprStringScalarStringScalar(col 9:boolean, val 3, val nothing)(children: LongColEqualLongScalar(col 2:bigint, val 3) -> 9:boolean) -> 10:string) -> 11:string) -> 12:string) -> 13:string
Statistics: Num rows: 12 Data size: 4416 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false