You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2017/02/03 20:03:50 UTC

[25/51] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

http://git-wip-us.apache.org/repos/asf/hive/blob/fcb57100/ql/src/test/results/clientpositive/llap/vector_interval_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_interval_1.q.out b/ql/src/test/results/clientpositive/llap/vector_interval_1.q.out
index 207e9bb..6da29e0 100644
--- a/ql/src/test/results/clientpositive/llap/vector_interval_1.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_interval_1.q.out
@@ -38,20 +38,24 @@ POSTHOOK: Lineage: vector_interval_1.dt EXPRESSION []
 POSTHOOK: Lineage: vector_interval_1.str1 EXPRESSION []
 POSTHOOK: Lineage: vector_interval_1.str2 EXPRESSION []
 POSTHOOK: Lineage: vector_interval_1.ts EXPRESSION []
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization expression
 select
   str1,
   interval '1-2' year to month, interval_year_month(str1),
   interval '1 2:3:4' day to second, interval_day_time(str2)
 from vector_interval_1 order by str1
 PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization expression
 select
   str1,
   interval '1-2' year to month, interval_year_month(str1),
   interval '1 2:3:4' day to second, interval_day_time(str2)
 from vector_interval_1 order by str1
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -69,26 +73,62 @@ STAGE PLANS:
                 TableScan
                   alias: vector_interval_1
                   Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
+                      projectedOutputColumns: [0, 1, 2, 3]
                   Select Operator
                     expressions: str1 (type: string), CAST( str1 AS INTERVAL YEAR TO MONTH) (type: interval_year_month), CAST( str2 AS INTERVAL DAY TO SECOND) (type: interval_day_time)
                     outputColumnNames: _col0, _col2, _col4
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumns: [2, 4, 5]
+                        selectExpressions: CastStringToIntervalYearMonth(col 2) -> 4:interval_year_month, CastStringToIntervalDayTime(col 3) -> 5:interval_day_time
                     Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
                     Reduce Output Operator
                       key expressions: _col0 (type: string)
                       sort order: +
+                      Reduce Sink Vectorization:
+                          className: VectorReduceSinkOperator
+                          native: false
+                          nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                          nativeConditionsNotMet: Uniform Hash IS false
                       Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
                       value expressions: _col2 (type: interval_year_month), _col4 (type: interval_day_time)
             Execution mode: vectorized, llap
             LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+                groupByVectorOutput: true
+                inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
         Reducer 2 
             Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+                groupByVectorOutput: true
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
             Reduce Operator Tree:
               Select Operator
                 expressions: KEY.reducesinkkey0 (type: string), 1-2 (type: interval_year_month), VALUE._col0 (type: interval_year_month), 1 02:03:04.000000000 (type: interval_day_time), VALUE._col1 (type: interval_day_time)
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4
+                Select Vectorization:
+                    className: VectorSelectOperator
+                    native: true
+                    projectedOutputColumns: [0, 3, 1, 4, 2]
+                    selectExpressions: ConstantVectorExpression(val 14) -> 3:long, ConstantVectorExpression(val 1 02:03:04.000000000) -> 4:interval_day_time
                 Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
+                  File Sink Vectorization:
+                      className: VectorFileSinkOperator
+                      native: false
                   Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -119,7 +159,7 @@ POSTHOOK: Input: default@vector_interval_1
 #### A masked pattern was here ####
 NULL	1-2	NULL	1 02:03:04.000000000	NULL
 1-2	1-2	1-2	1 02:03:04.000000000	1 02:03:04.000000000
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization expression
 select
   dt,
   interval '1-2' year to month + interval '1-2' year to month,
@@ -130,7 +170,7 @@ select
   interval '1-2' year to month - interval_year_month(str1)
 from vector_interval_1 order by dt
 PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization expression
 select
   dt,
   interval '1-2' year to month + interval '1-2' year to month,
@@ -141,6 +181,10 @@ select
   interval '1-2' year to month - interval_year_month(str1)
 from vector_interval_1 order by dt
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -158,26 +202,62 @@ STAGE PLANS:
                 TableScan
                   alias: vector_interval_1
                   Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
+                      projectedOutputColumns: [0, 1, 2, 3]
                   Select Operator
                     expressions: dt (type: date), (CAST( str1 AS INTERVAL YEAR TO MONTH) + CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: interval_year_month), (1-2 + CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: interval_year_month), (CAST( str1 AS INTERVAL YEAR TO MONTH) - CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: interval_year_month), (1-2 - CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: interval_year_month)
                     outputColumnNames: _col0, _col2, _col3, _col5, _col6
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumns: [1, 6, 5, 8, 7]
+                        selectExpressions: IntervalYearMonthColAddIntervalYearMonthColumn(col 4, col 5)(children: CastStringToIntervalYearMonth(col 2) -> 4:interval_year_month, CastStringToIntervalYearMonth(col 2) -> 5:interval_year_month) -> 6:long, IntervalYearMonthScalarAddIntervalYearMonthColumn(val 14, col 4)(children: CastStringToIntervalYearMonth(col 2) -> 4:interval_year_month) -> 5:long, IntervalYearMonthColSubtractIntervalYearMonthColumn(col 4, col 7)(children: CastStringToIntervalYearMonth(col 2) -> 4:interval_year_month, CastStringToIntervalYearMonth(col 2) -> 7:interval_year_month) -> 8:long, IntervalYearMonthScalarSubtractIntervalYearMonthColumn(val 14, col 4)(children: CastStringToIntervalYearMonth(col 2) -> 4:interval_year_month) -> 7:long
                     Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
                     Reduce Output Operator
                       key expressions: _col0 (type: date)
                       sort order: +
+                      Reduce Sink Vectorization:
+                          className: VectorReduceSinkOperator
+                          native: false
+                          nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                          nativeConditionsNotMet: Uniform Hash IS false
                       Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
                       value expressions: _col2 (type: interval_year_month), _col3 (type: interval_year_month), _col5 (type: interval_year_month), _col6 (type: interval_year_month)
             Execution mode: vectorized, llap
             LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+                groupByVectorOutput: true
+                inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
         Reducer 2 
             Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+                groupByVectorOutput: true
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
             Reduce Operator Tree:
               Select Operator
                 expressions: KEY.reducesinkkey0 (type: date), 2-4 (type: interval_year_month), VALUE._col0 (type: interval_year_month), VALUE._col1 (type: interval_year_month), 0-0 (type: interval_year_month), VALUE._col2 (type: interval_year_month), VALUE._col3 (type: interval_year_month)
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+                Select Vectorization:
+                    className: VectorSelectOperator
+                    native: true
+                    projectedOutputColumns: [0, 5, 1, 2, 6, 3, 4]
+                    selectExpressions: ConstantVectorExpression(val 28) -> 5:long, ConstantVectorExpression(val 0) -> 6:long
                 Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
+                  File Sink Vectorization:
+                      className: VectorFileSinkOperator
+                      native: false
                   Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -216,7 +296,7 @@ POSTHOOK: Input: default@vector_interval_1
 #### A masked pattern was here ####
 NULL	2-4	NULL	NULL	0-0	NULL	NULL
 2001-01-01	2-4	2-4	2-4	0-0	0-0	0-0
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization expression
 select
   dt,
   interval '1 2:3:4' day to second + interval '1 2:3:4' day to second,
@@ -227,7 +307,7 @@ select
   interval '1 2:3:4' day to second - interval_day_time(str2)
 from vector_interval_1 order by dt
 PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization expression
 select
   dt,
   interval '1 2:3:4' day to second + interval '1 2:3:4' day to second,
@@ -238,6 +318,10 @@ select
   interval '1 2:3:4' day to second - interval_day_time(str2)
 from vector_interval_1 order by dt
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -255,26 +339,62 @@ STAGE PLANS:
                 TableScan
                   alias: vector_interval_1
                   Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
+                      projectedOutputColumns: [0, 1, 2, 3]
                   Select Operator
                     expressions: dt (type: date), (CAST( str2 AS INTERVAL DAY TO SECOND) + CAST( str2 AS INTERVAL DAY TO SECOND)) (type: interval_day_time), (1 02:03:04.000000000 + CAST( str2 AS INTERVAL DAY TO SECOND)) (type: interval_day_time), (CAST( str2 AS INTERVAL DAY TO SECOND) - CAST( str2 AS INTERVAL DAY TO SECOND)) (type: interval_day_time), (1 02:03:04.000000000 - CAST( str2 AS INTERVAL DAY TO SECOND)) (type: interval_day_time)
                     outputColumnNames: _col0, _col2, _col3, _col5, _col6
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumns: [1, 6, 5, 8, 7]
+                        selectExpressions: IntervalDayTimeColAddIntervalDayTimeColumn(col 4, col 5)(children: CastStringToIntervalDayTime(col 3) -> 4:interval_day_time, CastStringToIntervalDayTime(col 3) -> 5:interval_day_time) -> 6:interval_day_time, IntervalDayTimeScalarAddIntervalDayTimeColumn(val 1 02:03:04.000000000, col 4)(children: CastStringToIntervalDayTime(col 3) -> 4:interval_day_time) -> 5:timestamp, IntervalDayTimeColSubtractIntervalDayTimeColumn(col 4, col 7)(children: CastStringToIntervalDayTime(col 3) -> 4:interval_day_time, CastStringToIntervalDayTime(col 3) -> 7:interval_day_time) -> 8:interval_day_time, IntervalDayTimeScalarSubtractIntervalDayTimeColumn(val 1 02:03:04.000000000, col 4)(children: CastStringToIntervalDayTime(col 3) -> 4:interval_day_time) -> 7:timestamp
                     Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
                     Reduce Output Operator
                       key expressions: _col0 (type: date)
                       sort order: +
+                      Reduce Sink Vectorization:
+                          className: VectorReduceSinkOperator
+                          native: false
+                          nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                          nativeConditionsNotMet: Uniform Hash IS false
                       Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
                       value expressions: _col2 (type: interval_day_time), _col3 (type: interval_day_time), _col5 (type: interval_day_time), _col6 (type: interval_day_time)
             Execution mode: vectorized, llap
             LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+                groupByVectorOutput: true
+                inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
         Reducer 2 
             Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+                groupByVectorOutput: true
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
             Reduce Operator Tree:
               Select Operator
                 expressions: KEY.reducesinkkey0 (type: date), 2 04:06:08.000000000 (type: interval_day_time), VALUE._col0 (type: interval_day_time), VALUE._col1 (type: interval_day_time), 0 00:00:00.000000000 (type: interval_day_time), VALUE._col2 (type: interval_day_time), VALUE._col3 (type: interval_day_time)
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+                Select Vectorization:
+                    className: VectorSelectOperator
+                    native: true
+                    projectedOutputColumns: [0, 5, 1, 2, 6, 3, 4]
+                    selectExpressions: ConstantVectorExpression(val 2 04:06:08.000000000) -> 5:interval_day_time, ConstantVectorExpression(val 0 00:00:00.000000000) -> 6:interval_day_time
                 Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
+                  File Sink Vectorization:
+                      className: VectorFileSinkOperator
+                      native: false
                   Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -313,7 +433,7 @@ POSTHOOK: Input: default@vector_interval_1
 #### A masked pattern was here ####
 NULL	2 04:06:08.000000000	NULL	NULL	0 00:00:00.000000000	NULL	NULL
 2001-01-01	2 04:06:08.000000000	2 04:06:08.000000000	2 04:06:08.000000000	0 00:00:00.000000000	0 00:00:00.000000000	0 00:00:00.000000000
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization expression
 select
   dt,
   dt + interval '1-2' year to month,
@@ -330,7 +450,7 @@ select
   dt - interval_day_time(str2)
 from vector_interval_1 order by dt
 PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization expression
 select
   dt,
   dt + interval '1-2' year to month,
@@ -347,6 +467,10 @@ select
   dt - interval_day_time(str2)
 from vector_interval_1 order by dt
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -364,26 +488,61 @@ STAGE PLANS:
                 TableScan
                   alias: vector_interval_1
                   Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
+                      projectedOutputColumns: [0, 1, 2, 3]
                   Select Operator
                     expressions: dt (type: date), (dt + 1-2) (type: date), (dt + CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: date), (1-2 + dt) (type: date), (CAST( str1 AS INTERVAL YEAR TO MONTH) + dt) (type: date), (dt - 1-2) (type: date), (dt - CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: date), (dt + 1 02:03:04.000000000) (type: timestamp), (dt + CAST( str2 AS INTERVAL DAY TO SECOND)) (type: timestamp), (1 02:03:04.000000000 + dt) (type: timestamp), (CAST( str2 AS INTERVAL DAY TO SECOND) + dt) (type: timestamp), (dt - 1 02:03:04.000000000) (type: timestamp), (dt - CAST( str2 AS INTERVAL DAY TO SECOND)) (type: timestamp)
                     outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumns: [1, 4, 6, 5, 8, 7, 10, 11, 13, 14, 15, 16, 17]
+                        selectExpressions: DateColAddIntervalYearMonthScalar(col 1, val 1-2) -> 4:long, DateColAddIntervalYearMonthColumn(col 1, col 5)(children: CastStringToIntervalYearMonth(col 2) -> 5:interval_year_month) -> 6:long, IntervalYearMonthScalarAddDateColumn(val 1-2, col 1) -> 5:long, IntervalYearMonthColAddDateColumn(col 7, col 1)(children: CastStringToIntervalYearMonth(col 2) -> 7:interval_year_month) -> 8:long, DateColSubtractIntervalYearMonthScalar(col 1, val 1-2) -> 7:long, DateColSubtractIntervalYearMonthColumn(col 1, col 9)(children: CastStringToIntervalYearMonth(col 2) -> 9:interval_year_month) -> 10:long, DateColAddIntervalDayTimeScalar(col 1, val 1 02:03:04.000000000) -> 11:timestamp, DateColAddIntervalDayTimeColumn(col 1, col 12)(children: CastStringToIntervalDayTime(col 3) -> 12:interval_day_time) -> 13:timestamp, IntervalDayTimeScalarAddDateColumn(val 1 02:03:04.000000000, col 1) -> 14:timestamp, IntervalDayTimeColAddDateColumn(col 12, col 1)(children: Cas
 tStringToIntervalDayTime(col 3) -> 12:interval_day_time) -> 15:interval_day_time, DateColSubtractIntervalDayTimeScalar(col 1, val 1 02:03:04.000000000) -> 16:timestamp, DateColSubtractIntervalDayTimeColumn(col 1, col 12)(children: CastStringToIntervalDayTime(col 3) -> 12:interval_day_time) -> 17:timestamp
                     Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
                     Reduce Output Operator
                       key expressions: _col0 (type: date)
                       sort order: +
+                      Reduce Sink Vectorization:
+                          className: VectorReduceSinkOperator
+                          native: false
+                          nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                          nativeConditionsNotMet: Uniform Hash IS false
                       Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
                       value expressions: _col1 (type: date), _col2 (type: date), _col3 (type: date), _col4 (type: date), _col5 (type: date), _col6 (type: date), _col7 (type: timestamp), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: timestamp), _col11 (type: timestamp), _col12 (type: timestamp)
             Execution mode: vectorized, llap
             LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+                groupByVectorOutput: true
+                inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
         Reducer 2 
             Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+                groupByVectorOutput: true
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
             Reduce Operator Tree:
               Select Operator
                 expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: date), VALUE._col1 (type: date), VALUE._col2 (type: date), VALUE._col3 (type: date), VALUE._col4 (type: date), VALUE._col5 (type: date), VALUE._col6 (type: timestamp), VALUE._col7 (type: timestamp), VALUE._col8 (type: timestamp), VALUE._col9 (type: timestamp), VALUE._col10 (type: timestamp), VALUE._col11 (type: timestamp)
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
+                Select Vectorization:
+                    className: VectorSelectOperator
+                    native: true
+                    projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
                 Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
+                  File Sink Vectorization:
+                      className: VectorFileSinkOperator
+                      native: false
                   Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -434,7 +593,7 @@ POSTHOOK: Input: default@vector_interval_1
 #### A masked pattern was here ####
 NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL
 2001-01-01	2002-03-01	2002-03-01	2002-03-01	2002-03-01	1999-11-01	1999-11-01	2001-01-02 02:03:04	2001-01-02 02:03:04	2001-01-02 02:03:04	2001-01-02 02:03:04	2000-12-30 21:56:56	2000-12-30 21:56:56
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization expression
 select
   ts,
   ts + interval '1-2' year to month,
@@ -451,7 +610,7 @@ select
   ts - interval_day_time(str2)
 from vector_interval_1 order by ts
 PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization expression
 select
   ts,
   ts + interval '1-2' year to month,
@@ -468,6 +627,10 @@ select
   ts - interval_day_time(str2)
 from vector_interval_1 order by ts
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -485,26 +648,61 @@ STAGE PLANS:
                 TableScan
                   alias: vector_interval_1
                   Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
+                      projectedOutputColumns: [0, 1, 2, 3]
                   Select Operator
                     expressions: ts (type: timestamp), (ts + 1-2) (type: timestamp), (ts + CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: timestamp), (1-2 + ts) (type: timestamp), (CAST( str1 AS INTERVAL YEAR TO MONTH) + ts) (type: timestamp), (ts - 1-2) (type: timestamp), (ts - CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: timestamp), (ts + 1 02:03:04.000000000) (type: timestamp), (ts + CAST( str2 AS INTERVAL DAY TO SECOND)) (type: timestamp), (1 02:03:04.000000000 + ts) (type: timestamp), (CAST( str2 AS INTERVAL DAY TO SECOND) + ts) (type: timestamp), (ts - 1 02:03:04.000000000) (type: timestamp), (ts - CAST( str2 AS INTERVAL DAY TO SECOND)) (type: timestamp)
                     outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumns: [0, 4, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17]
+                        selectExpressions: TimestampColAddIntervalYearMonthScalar(col 0, val 1-2) -> 4:timestamp, TimestampColAddIntervalYearMonthColumn(col 0, col 5)(children: CastStringToIntervalYearMonth(col 2) -> 5:interval_year_month) -> 6:timestamp, IntervalYearMonthScalarAddTimestampColumn(val 1-2, col 0) -> 7:timestamp, IntervalYearMonthColAddTimestampColumn(col 5, col 0)(children: CastStringToIntervalYearMonth(col 2) -> 5:interval_year_month) -> 8:timestamp, TimestampColSubtractIntervalYearMonthScalar(col 0, val 1-2) -> 9:timestamp, TimestampColSubtractIntervalYearMonthColumn(col 0, col 5)(children: CastStringToIntervalYearMonth(col 2) -> 5:interval_year_month) -> 10:timestamp, TimestampColAddIntervalDayTimeScalar(col 0, val 1 02:03:04.000000000) -> 11:timestamp, TimestampColAddIntervalDayTimeColumn(col 0, col 12)(children: CastStringToIntervalDayTime(col 3) -> 12:interval_day_time) -> 13:timestamp, IntervalDayTimeScalarAddTimestampColumn(val 1 02:03:04.000000000, col 0) ->
  14:timestamp, IntervalDayTimeColAddTimestampColumn(col 12, col 0)(children: CastStringToIntervalDayTime(col 3) -> 12:interval_day_time) -> 15:timestamp, TimestampColSubtractIntervalDayTimeScalar(col 0, val 1 02:03:04.000000000) -> 16:timestamp, TimestampColSubtractIntervalDayTimeColumn(col 0, col 12)(children: CastStringToIntervalDayTime(col 3) -> 12:interval_day_time) -> 17:timestamp
                     Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
                     Reduce Output Operator
                       key expressions: _col0 (type: timestamp)
                       sort order: +
+                      Reduce Sink Vectorization:
+                          className: VectorReduceSinkOperator
+                          native: false
+                          nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                          nativeConditionsNotMet: Uniform Hash IS false
                       Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
                       value expressions: _col1 (type: timestamp), _col2 (type: timestamp), _col3 (type: timestamp), _col4 (type: timestamp), _col5 (type: timestamp), _col6 (type: timestamp), _col7 (type: timestamp), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: timestamp), _col11 (type: timestamp), _col12 (type: timestamp)
             Execution mode: vectorized, llap
             LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+                groupByVectorOutput: true
+                inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
         Reducer 2 
             Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+                groupByVectorOutput: true
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
             Reduce Operator Tree:
               Select Operator
                 expressions: KEY.reducesinkkey0 (type: timestamp), VALUE._col0 (type: timestamp), VALUE._col1 (type: timestamp), VALUE._col2 (type: timestamp), VALUE._col3 (type: timestamp), VALUE._col4 (type: timestamp), VALUE._col5 (type: timestamp), VALUE._col6 (type: timestamp), VALUE._col7 (type: timestamp), VALUE._col8 (type: timestamp), VALUE._col9 (type: timestamp), VALUE._col10 (type: timestamp), VALUE._col11 (type: timestamp)
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
+                Select Vectorization:
+                    className: VectorSelectOperator
+                    native: true
+                    projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
                 Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
+                  File Sink Vectorization:
+                      className: VectorFileSinkOperator
+                      native: false
                   Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -555,7 +753,7 @@ POSTHOOK: Input: default@vector_interval_1
 #### A masked pattern was here ####
 NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL
 2001-01-01 01:02:03	2002-03-01 01:02:03	2002-03-01 01:02:03	2002-03-01 01:02:03	2002-03-01 01:02:03	1999-11-01 01:02:03	1999-11-01 01:02:03	2001-01-02 03:05:07	2001-01-02 03:05:07	2001-01-02 03:05:07	2001-01-02 03:05:07	2000-12-30 22:58:59	2000-12-30 22:58:59
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization expression
 select
   ts,
   ts - ts,
@@ -563,7 +761,7 @@ select
   ts - timestamp '2001-01-01 01:02:03'
 from vector_interval_1 order by ts
 PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization expression
 select
   ts,
   ts - ts,
@@ -571,6 +769,10 @@ select
   ts - timestamp '2001-01-01 01:02:03'
 from vector_interval_1 order by ts
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -588,26 +790,61 @@ STAGE PLANS:
                 TableScan
                   alias: vector_interval_1
                   Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
+                      projectedOutputColumns: [0, 1, 2, 3]
                   Select Operator
                     expressions: ts (type: timestamp), (ts - ts) (type: interval_day_time), (2001-01-01 01:02:03.0 - ts) (type: interval_day_time), (ts - 2001-01-01 01:02:03.0) (type: interval_day_time)
                     outputColumnNames: _col0, _col1, _col2, _col3
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumns: [0, 4, 5, 6]
+                        selectExpressions: TimestampColSubtractTimestampColumn(col 0, col 0) -> 4:interval_day_time, TimestampScalarSubtractTimestampColumn(val 2001-01-01 01:02:03.0, col 0) -> 5:timestamp, TimestampColSubtractTimestampScalar(col 0, val 2001-01-01 01:02:03.0) -> 6:interval_day_time
                     Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
                     Reduce Output Operator
                       key expressions: _col0 (type: timestamp)
                       sort order: +
+                      Reduce Sink Vectorization:
+                          className: VectorReduceSinkOperator
+                          native: false
+                          nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                          nativeConditionsNotMet: Uniform Hash IS false
                       Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
                       value expressions: _col1 (type: interval_day_time), _col2 (type: interval_day_time), _col3 (type: interval_day_time)
             Execution mode: vectorized, llap
             LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+                groupByVectorOutput: true
+                inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
         Reducer 2 
             Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+                groupByVectorOutput: true
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
             Reduce Operator Tree:
               Select Operator
                 expressions: KEY.reducesinkkey0 (type: timestamp), VALUE._col0 (type: interval_day_time), VALUE._col1 (type: interval_day_time), VALUE._col2 (type: interval_day_time)
                 outputColumnNames: _col0, _col1, _col2, _col3
+                Select Vectorization:
+                    className: VectorSelectOperator
+                    native: true
+                    projectedOutputColumns: [0, 1, 2, 3]
                 Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
+                  File Sink Vectorization:
+                      className: VectorFileSinkOperator
+                      native: false
                   Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -640,7 +877,7 @@ POSTHOOK: Input: default@vector_interval_1
 #### A masked pattern was here ####
 NULL	NULL	NULL	NULL
 2001-01-01 01:02:03	0 00:00:00.000000000	0 00:00:00.000000000	0 00:00:00.000000000
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization expression
 select
   dt,
   dt - dt,
@@ -648,7 +885,7 @@ select
   dt - date '2001-01-01'
 from vector_interval_1 order by dt
 PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization expression
 select
   dt,
   dt - dt,
@@ -656,6 +893,10 @@ select
   dt - date '2001-01-01'
 from vector_interval_1 order by dt
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -673,26 +914,61 @@ STAGE PLANS:
                 TableScan
                   alias: vector_interval_1
                   Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
+                      projectedOutputColumns: [0, 1, 2, 3]
                   Select Operator
                     expressions: dt (type: date), (dt - dt) (type: interval_day_time), (2001-01-01 - dt) (type: interval_day_time), (dt - 2001-01-01) (type: interval_day_time)
                     outputColumnNames: _col0, _col1, _col2, _col3
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumns: [1, 4, 5, 6]
+                        selectExpressions: DateColSubtractDateColumn(col 1, col 1) -> 4:timestamp, DateScalarSubtractDateColumn(val 2001-01-01 00:00:00.0, col 1) -> 5:timestamp, DateColSubtractDateScalar(col 1, val 2001-01-01 00:00:00.0) -> 6:timestamp
                     Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
                     Reduce Output Operator
                       key expressions: _col0 (type: date)
                       sort order: +
+                      Reduce Sink Vectorization:
+                          className: VectorReduceSinkOperator
+                          native: false
+                          nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                          nativeConditionsNotMet: Uniform Hash IS false
                       Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
                       value expressions: _col1 (type: interval_day_time), _col2 (type: interval_day_time), _col3 (type: interval_day_time)
             Execution mode: vectorized, llap
             LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+                groupByVectorOutput: true
+                inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
         Reducer 2 
             Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+                groupByVectorOutput: true
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
             Reduce Operator Tree:
               Select Operator
                 expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: interval_day_time), VALUE._col1 (type: interval_day_time), VALUE._col2 (type: interval_day_time)
                 outputColumnNames: _col0, _col1, _col2, _col3
+                Select Vectorization:
+                    className: VectorSelectOperator
+                    native: true
+                    projectedOutputColumns: [0, 1, 2, 3]
                 Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
+                  File Sink Vectorization:
+                      className: VectorFileSinkOperator
+                      native: false
                   Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -725,7 +1001,7 @@ POSTHOOK: Input: default@vector_interval_1
 #### A masked pattern was here ####
 NULL	NULL	NULL	NULL
 2001-01-01	0 00:00:00.000000000	0 00:00:00.000000000	0 00:00:00.000000000
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization expression
 select
   dt,
   ts - dt,
@@ -736,7 +1012,7 @@ select
   date '2001-01-01' - ts
 from vector_interval_1 order by dt
 PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization expression
 select
   dt,
   ts - dt,
@@ -747,6 +1023,10 @@ select
   date '2001-01-01' - ts
 from vector_interval_1 order by dt
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -764,26 +1044,61 @@ STAGE PLANS:
                 TableScan
                   alias: vector_interval_1
                   Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
+                      projectedOutputColumns: [0, 1, 2, 3]
                   Select Operator
                     expressions: dt (type: date), (ts - dt) (type: interval_day_time), (2001-01-01 01:02:03.0 - dt) (type: interval_day_time), (ts - 2001-01-01) (type: interval_day_time), (dt - ts) (type: interval_day_time), (dt - 2001-01-01 01:02:03.0) (type: interval_day_time), (2001-01-01 - ts) (type: interval_day_time)
                     outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumns: [1, 4, 5, 6, 7, 8, 9]
+                        selectExpressions: TimestampColSubtractDateColumn(col 0, col 1) -> 4:interval_day_time, TimestampScalarSubtractDateColumn(val 2001-01-01 01:02:03.0, col 1) -> 5:interval_day_time, TimestampColSubtractDateScalar(col 0, val 2001-01-01 00:00:00.0) -> 6:interval_day_time, DateColSubtractTimestampColumn(col 1, col 0) -> 7:interval_day_time, DateColSubtractTimestampScalar(col 1, val 2001-01-01 01:02:03.0) -> 8:interval_day_time, DateScalarSubtractTimestampColumn(val 2001-01-01 00:00:00.0, col 0) -> 9:interval_day_time
                     Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
                     Reduce Output Operator
                       key expressions: _col0 (type: date)
                       sort order: +
+                      Reduce Sink Vectorization:
+                          className: VectorReduceSinkOperator
+                          native: false
+                          nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                          nativeConditionsNotMet: Uniform Hash IS false
                       Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
                       value expressions: _col1 (type: interval_day_time), _col2 (type: interval_day_time), _col3 (type: interval_day_time), _col4 (type: interval_day_time), _col5 (type: interval_day_time), _col6 (type: interval_day_time)
             Execution mode: vectorized, llap
             LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+                groupByVectorOutput: true
+                inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
         Reducer 2 
             Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+                groupByVectorOutput: true
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
             Reduce Operator Tree:
               Select Operator
                 expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: interval_day_time), VALUE._col1 (type: interval_day_time), VALUE._col2 (type: interval_day_time), VALUE._col3 (type: interval_day_time), VALUE._col4 (type: interval_day_time), VALUE._col5 (type: interval_day_time)
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+                Select Vectorization:
+                    className: VectorSelectOperator
+                    native: true
+                    projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6]
                 Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
+                  File Sink Vectorization:
+                      className: VectorFileSinkOperator
+                      native: false
                   Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat