You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by xu...@apache.org on 2017/11/30 03:18:50 UTC
[14/29] hive git commit: Revert "HIVE-17528 : Add more q-tests for
Hive-on-Spark with Parquet vectorized reader (Ferdinand Xu,
reviewed by Vihang Karajgaonkar)"
http://git-wip-us.apache.org/repos/asf/hive/blob/a5d5473f/ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out
deleted file mode 100644
index b3e2f17..0000000
--- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out
+++ /dev/null
@@ -1,31602 +0,0 @@
-PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
-SELECT MIN(ctinyint) as c1,
- MAX(ctinyint),
- COUNT(ctinyint),
- COUNT(*)
-FROM alltypesparquet
-ORDER BY c1
-PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
-SELECT MIN(ctinyint) as c1,
- MAX(ctinyint),
- COUNT(ctinyint),
- COUNT(*)
-FROM alltypesparquet
-ORDER BY c1
-POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
-STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
- Stage: Stage-1
- Spark
- Edges:
- Reducer 2 <- Map 1 (GROUP, 1)
- Reducer 3 <- Reducer 2 (SORT, 1)
-#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: alltypesparquet
- Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
- vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>]
- Select Operator
- expressions: ctinyint (type: tinyint)
- outputColumnNames: ctinyint
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0]
- Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: min(ctinyint), max(ctinyint), count(ctinyint), count()
- Group By Vectorization:
- aggregators: VectorUDAFMinLong(col 0:tinyint) -> tinyint, VectorUDAFMaxLong(col 0:tinyint) -> tinyint, VectorUDAFCount(col 0:tinyint) -> bigint, VectorUDAFCountStar(*) -> bigint
- className: VectorGroupByOperator
- groupByMode: HASH
- native: false
- vectorProcessingMode: HASH
- projectedOutputColumnNums: [0, 1, 2, 3]
- mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Reduce Sink Vectorization:
- className: VectorReduceSinkEmptyKeyOperator
- keyColumnNums: []
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- valueColumnNums: [0, 1, 2, 3]
- Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: tinyint), _col1 (type: tinyint), _col2 (type: bigint), _col3 (type: bigint)
- Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: []
- featureSupportInUse: []
- inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 12
- includeColumns: [0]
- dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
- partitionColumnCount: 0
- scratchColumnTypeNames: []
- Reducer 2
- Execution mode: vectorized
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- reduceColumnNullOrder:
- reduceColumnSortOrder:
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 4
- dataColumns: VALUE._col0:tinyint, VALUE._col1:tinyint, VALUE._col2:bigint, VALUE._col3:bigint
- partitionColumnCount: 0
- scratchColumnTypeNames: []
- Reduce Operator Tree:
- Group By Operator
- aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3)
- Group By Vectorization:
- aggregators: VectorUDAFMinLong(col 0:tinyint) -> tinyint, VectorUDAFMaxLong(col 1:tinyint) -> tinyint, VectorUDAFCountMerge(col 2:bigint) -> bigint, VectorUDAFCountMerge(col 3:bigint) -> bigint
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- native: false
- vectorProcessingMode: GLOBAL
- projectedOutputColumnNums: [0, 1, 2, 3]
- mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: tinyint)
- sort order: +
- Reduce Sink Vectorization:
- className: VectorReduceSinkObjectHashOperator
- keyColumnNums: [0]
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- valueColumnNums: [1, 2, 3]
- Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: tinyint), _col2 (type: bigint), _col3 (type: bigint)
- Reducer 3
- Execution mode: vectorized
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- reduceColumnNullOrder: a
- reduceColumnSortOrder: +
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 4
- dataColumns: KEY.reducesinkkey0:tinyint, VALUE._col0:tinyint, VALUE._col1:bigint, VALUE._col2:bigint
- partitionColumnCount: 0
- scratchColumnTypeNames: []
- Reduce Operator Tree:
- Select Operator
- expressions: KEY.reducesinkkey0 (type: tinyint), VALUE._col0 (type: tinyint), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint)
- outputColumnNames: _col0, _col1, _col2, _col3
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 1, 2, 3]
- Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
- Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Processor Tree:
- ListSink
-
-PREHOOK: query: SELECT MIN(ctinyint) as c1,
- MAX(ctinyint),
- COUNT(ctinyint),
- COUNT(*)
-FROM alltypesparquet
-ORDER BY c1
-PREHOOK: type: QUERY
-PREHOOK: Input: default@alltypesparquet
-#### A masked pattern was here ####
-POSTHOOK: query: SELECT MIN(ctinyint) as c1,
- MAX(ctinyint),
- COUNT(ctinyint),
- COUNT(*)
-FROM alltypesparquet
-ORDER BY c1
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@alltypesparquet
-#### A masked pattern was here ####
--64 62 9173 12288
-PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
-SELECT SUM(ctinyint) as c1
-FROM alltypesparquet
-ORDER BY c1
-PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
-SELECT SUM(ctinyint) as c1
-FROM alltypesparquet
-ORDER BY c1
-POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
-STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
- Stage: Stage-1
- Spark
- Edges:
- Reducer 2 <- Map 1 (GROUP, 1)
- Reducer 3 <- Reducer 2 (SORT, 1)
-#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: alltypesparquet
- Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
- vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>]
- Select Operator
- expressions: ctinyint (type: tinyint)
- outputColumnNames: ctinyint
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0]
- Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: sum(ctinyint)
- Group By Vectorization:
- aggregators: VectorUDAFSumLong(col 0:tinyint) -> bigint
- className: VectorGroupByOperator
- groupByMode: HASH
- native: false
- vectorProcessingMode: HASH
- projectedOutputColumnNums: [0]
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Reduce Sink Vectorization:
- className: VectorReduceSinkEmptyKeyOperator
- keyColumnNums: []
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- valueColumnNums: [0]
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: bigint)
- Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: []
- featureSupportInUse: []
- inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 12
- includeColumns: [0]
- dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
- partitionColumnCount: 0
- scratchColumnTypeNames: []
- Reducer 2
- Execution mode: vectorized
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- reduceColumnNullOrder:
- reduceColumnSortOrder:
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 1
- dataColumns: VALUE._col0:bigint
- partitionColumnCount: 0
- scratchColumnTypeNames: []
- Reduce Operator Tree:
- Group By Operator
- aggregations: sum(VALUE._col0)
- Group By Vectorization:
- aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- native: false
- vectorProcessingMode: GLOBAL
- projectedOutputColumnNums: [0]
- mode: mergepartial
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: bigint)
- sort order: +
- Reduce Sink Vectorization:
- className: VectorReduceSinkObjectHashOperator
- keyColumnNums: [0]
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- valueColumnNums: []
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reducer 3
- Execution mode: vectorized
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- reduceColumnNullOrder: a
- reduceColumnSortOrder: +
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 1
- dataColumns: KEY.reducesinkkey0:bigint
- partitionColumnCount: 0
- scratchColumnTypeNames: []
- Reduce Operator Tree:
- Select Operator
- expressions: KEY.reducesinkkey0 (type: bigint)
- outputColumnNames: _col0
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0]
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Processor Tree:
- ListSink
-
-PREHOOK: query: SELECT SUM(ctinyint) as c1
-FROM alltypesparquet
-ORDER BY c1
-PREHOOK: type: QUERY
-PREHOOK: Input: default@alltypesparquet
-#### A masked pattern was here ####
-POSTHOOK: query: SELECT SUM(ctinyint) as c1
-FROM alltypesparquet
-ORDER BY c1
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@alltypesparquet
-#### A masked pattern was here ####
--39856
-PREHOOK: query: EXPLAIN VECTORIZATION
-SELECT
- avg(ctinyint) as c1,
- variance(ctinyint),
- var_pop(ctinyint),
- var_samp(ctinyint),
- std(ctinyint),
- stddev(ctinyint),
- stddev_pop(ctinyint),
- stddev_samp(ctinyint)
-FROM alltypesparquet
-ORDER BY c1
-PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN VECTORIZATION
-SELECT
- avg(ctinyint) as c1,
- variance(ctinyint),
- var_pop(ctinyint),
- var_samp(ctinyint),
- std(ctinyint),
- stddev(ctinyint),
- stddev_pop(ctinyint),
- stddev_samp(ctinyint)
-FROM alltypesparquet
-ORDER BY c1
-POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
-STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
- Stage: Stage-1
- Spark
- Edges:
- Reducer 2 <- Map 1 (GROUP, 1)
- Reducer 3 <- Reducer 2 (SORT, 1)
-#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: alltypesparquet
- Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: ctinyint (type: tinyint)
- outputColumnNames: ctinyint
- Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: avg(ctinyint), variance(ctinyint), var_pop(ctinyint), var_samp(ctinyint), std(ctinyint), stddev(ctinyint), stddev_pop(ctinyint), stddev_samp(ctinyint)
- mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
- Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: struct<count:bigint,sum:double,input:tinyint>), _col1 (type: struct<count:bigint,sum:double,variance:double>), _col2 (type: struct<count:bigint,sum:double,variance:double>), _col3 (type: struct<count:bigint,sum:double,variance:double>), _col4 (type: struct<count:bigint,sum:double,variance:double>), _col5 (type: struct<count:bigint,sum:double,variance:double>), _col6 (type: struct<count:bigint,sum:double,variance:double>), _col7 (type: struct<count:bigint,sum:double,variance:double>)
- Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: []
- featureSupportInUse: []
- inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- Reducer 2
- Execution mode: vectorized
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- Reduce Operator Tree:
- Group By Operator
- aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7)
- mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
- Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: double)
- sort order: +
- Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double)
- Reducer 3
- Execution mode: vectorized
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- Reduce Operator Tree:
- Select Operator
- expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: double)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
- Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Processor Tree:
- ListSink
-
-PREHOOK: query: SELECT
- avg(ctinyint) as c1,
- variance(ctinyint),
- var_pop(ctinyint),
- var_samp(ctinyint),
- std(ctinyint),
- stddev(ctinyint),
- stddev_pop(ctinyint),
- stddev_samp(ctinyint)
-FROM alltypesparquet
-ORDER BY c1
-PREHOOK: type: QUERY
-PREHOOK: Input: default@alltypesparquet
-#### A masked pattern was here ####
-POSTHOOK: query: SELECT
- avg(ctinyint) as c1,
- variance(ctinyint),
- var_pop(ctinyint),
- var_samp(ctinyint),
- std(ctinyint),
- stddev(ctinyint),
- stddev_pop(ctinyint),
- stddev_samp(ctinyint)
-FROM alltypesparquet
-ORDER BY c1
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@alltypesparquet
-#### A masked pattern was here ####
--4.344925324321378 1158.3003004768184 1158.3003004768184 1158.4265870337827 34.033811136527426 34.033811136527426 34.033811136527426 34.03566639620536
-PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
-SELECT MIN(cbigint) as c1,
- MAX(cbigint),
- COUNT(cbigint),
- COUNT(*)
-FROM alltypesparquet
-ORDER BY c1
-PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
-SELECT MIN(cbigint) as c1,
- MAX(cbigint),
- COUNT(cbigint),
- COUNT(*)
-FROM alltypesparquet
-ORDER BY c1
-POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
-STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
- Stage: Stage-1
- Spark
- Edges:
- Reducer 2 <- Map 1 (GROUP, 1)
- Reducer 3 <- Reducer 2 (SORT, 1)
-#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: alltypesparquet
- Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
- vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>]
- Select Operator
- expressions: cbigint (type: bigint)
- outputColumnNames: cbigint
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [3]
- Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: min(cbigint), max(cbigint), count(cbigint), count()
- Group By Vectorization:
- aggregators: VectorUDAFMinLong(col 3:bigint) -> bigint, VectorUDAFMaxLong(col 3:bigint) -> bigint, VectorUDAFCount(col 3:bigint) -> bigint, VectorUDAFCountStar(*) -> bigint
- className: VectorGroupByOperator
- groupByMode: HASH
- native: false
- vectorProcessingMode: HASH
- projectedOutputColumnNums: [0, 1, 2, 3]
- mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Reduce Sink Vectorization:
- className: VectorReduceSinkEmptyKeyOperator
- keyColumnNums: []
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- valueColumnNums: [0, 1, 2, 3]
- Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint)
- Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: []
- featureSupportInUse: []
- inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 12
- includeColumns: [3]
- dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
- partitionColumnCount: 0
- scratchColumnTypeNames: []
- Reducer 2
- Execution mode: vectorized
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- reduceColumnNullOrder:
- reduceColumnSortOrder:
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 4
- dataColumns: VALUE._col0:bigint, VALUE._col1:bigint, VALUE._col2:bigint, VALUE._col3:bigint
- partitionColumnCount: 0
- scratchColumnTypeNames: []
- Reduce Operator Tree:
- Group By Operator
- aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3)
- Group By Vectorization:
- aggregators: VectorUDAFMinLong(col 0:bigint) -> bigint, VectorUDAFMaxLong(col 1:bigint) -> bigint, VectorUDAFCountMerge(col 2:bigint) -> bigint, VectorUDAFCountMerge(col 3:bigint) -> bigint
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- native: false
- vectorProcessingMode: GLOBAL
- projectedOutputColumnNums: [0, 1, 2, 3]
- mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: bigint)
- sort order: +
- Reduce Sink Vectorization:
- className: VectorReduceSinkObjectHashOperator
- keyColumnNums: [0]
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- valueColumnNums: [1, 2, 3]
- Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint)
- Reducer 3
- Execution mode: vectorized
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- reduceColumnNullOrder: a
- reduceColumnSortOrder: +
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 4
- dataColumns: KEY.reducesinkkey0:bigint, VALUE._col0:bigint, VALUE._col1:bigint, VALUE._col2:bigint
- partitionColumnCount: 0
- scratchColumnTypeNames: []
- Reduce Operator Tree:
- Select Operator
- expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint)
- outputColumnNames: _col0, _col1, _col2, _col3
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 1, 2, 3]
- Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
- Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Processor Tree:
- ListSink
-
-PREHOOK: query: SELECT MIN(cbigint) as c1,
- MAX(cbigint),
- COUNT(cbigint),
- COUNT(*)
-FROM alltypesparquet
-ORDER BY c1
-PREHOOK: type: QUERY
-PREHOOK: Input: default@alltypesparquet
-#### A masked pattern was here ####
-POSTHOOK: query: SELECT MIN(cbigint) as c1,
- MAX(cbigint),
- COUNT(cbigint),
- COUNT(*)
-FROM alltypesparquet
-ORDER BY c1
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@alltypesparquet
-#### A masked pattern was here ####
--2147311592 2145498388 9173 12288
-PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
-SELECT SUM(cbigint) as c1
-FROM alltypesparquet
-ORDER BY c1
-PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
-SELECT SUM(cbigint) as c1
-FROM alltypesparquet
-ORDER BY c1
-POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
-STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
- Stage: Stage-1
- Spark
- Edges:
- Reducer 2 <- Map 1 (GROUP, 1)
- Reducer 3 <- Reducer 2 (SORT, 1)
-#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: alltypesparquet
- Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
- vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>]
- Select Operator
- expressions: cbigint (type: bigint)
- outputColumnNames: cbigint
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [3]
- Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: sum(cbigint)
- Group By Vectorization:
- aggregators: VectorUDAFSumLong(col 3:bigint) -> bigint
- className: VectorGroupByOperator
- groupByMode: HASH
- native: false
- vectorProcessingMode: HASH
- projectedOutputColumnNums: [0]
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Reduce Sink Vectorization:
- className: VectorReduceSinkEmptyKeyOperator
- keyColumnNums: []
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- valueColumnNums: [0]
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: bigint)
- Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: []
- featureSupportInUse: []
- inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 12
- includeColumns: [3]
- dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
- partitionColumnCount: 0
- scratchColumnTypeNames: []
- Reducer 2
- Execution mode: vectorized
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- reduceColumnNullOrder:
- reduceColumnSortOrder:
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 1
- dataColumns: VALUE._col0:bigint
- partitionColumnCount: 0
- scratchColumnTypeNames: []
- Reduce Operator Tree:
- Group By Operator
- aggregations: sum(VALUE._col0)
- Group By Vectorization:
- aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- native: false
- vectorProcessingMode: GLOBAL
- projectedOutputColumnNums: [0]
- mode: mergepartial
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: bigint)
- sort order: +
- Reduce Sink Vectorization:
- className: VectorReduceSinkObjectHashOperator
- keyColumnNums: [0]
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- valueColumnNums: []
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reducer 3
- Execution mode: vectorized
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- reduceColumnNullOrder: a
- reduceColumnSortOrder: +
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 1
- dataColumns: KEY.reducesinkkey0:bigint
- partitionColumnCount: 0
- scratchColumnTypeNames: []
- Reduce Operator Tree:
- Select Operator
- expressions: KEY.reducesinkkey0 (type: bigint)
- outputColumnNames: _col0
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0]
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Processor Tree:
- ListSink
-
-PREHOOK: query: SELECT SUM(cbigint) as c1
-FROM alltypesparquet
-ORDER BY c1
-PREHOOK: type: QUERY
-PREHOOK: Input: default@alltypesparquet
-#### A masked pattern was here ####
-POSTHOOK: query: SELECT SUM(cbigint) as c1
-FROM alltypesparquet
-ORDER BY c1
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@alltypesparquet
-#### A masked pattern was here ####
--1698460028409
-PREHOOK: query: EXPLAIN VECTORIZATION
-SELECT
- avg(cbigint) as c1,
- variance(cbigint),
- var_pop(cbigint),
- var_samp(cbigint),
- std(cbigint),
- stddev(cbigint),
- stddev_pop(cbigint),
- stddev_samp(cbigint)
-FROM alltypesparquet
-ORDER BY c1
-PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN VECTORIZATION
-SELECT
- avg(cbigint) as c1,
- variance(cbigint),
- var_pop(cbigint),
- var_samp(cbigint),
- std(cbigint),
- stddev(cbigint),
- stddev_pop(cbigint),
- stddev_samp(cbigint)
-FROM alltypesparquet
-ORDER BY c1
-POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
-STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
- Stage: Stage-1
- Spark
- Edges:
- Reducer 2 <- Map 1 (GROUP, 1)
- Reducer 3 <- Reducer 2 (SORT, 1)
-#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: alltypesparquet
- Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: cbigint (type: bigint)
- outputColumnNames: cbigint
- Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: avg(cbigint), variance(cbigint), var_pop(cbigint), var_samp(cbigint), std(cbigint), stddev(cbigint), stddev_pop(cbigint), stddev_samp(cbigint)
- mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
- Statistics: Num rows: 1 Data size: 640 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 640 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: struct<count:bigint,sum:double,input:bigint>), _col1 (type: struct<count:bigint,sum:double,variance:double>), _col2 (type: struct<count:bigint,sum:double,variance:double>), _col3 (type: struct<count:bigint,sum:double,variance:double>), _col4 (type: struct<count:bigint,sum:double,variance:double>), _col5 (type: struct<count:bigint,sum:double,variance:double>), _col6 (type: struct<count:bigint,sum:double,variance:double>), _col7 (type: struct<count:bigint,sum:double,variance:double>)
- Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: []
- featureSupportInUse: []
- inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- Reducer 2
- Execution mode: vectorized
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- Reduce Operator Tree:
- Group By Operator
- aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7)
- mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
- Statistics: Num rows: 1 Data size: 640 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: double)
- sort order: +
- Statistics: Num rows: 1 Data size: 640 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double)
- Reducer 3
- Execution mode: vectorized
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- Reduce Operator Tree:
- Select Operator
- expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: double)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
- Statistics: Num rows: 1 Data size: 640 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 640 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Processor Tree:
- ListSink
-
-PREHOOK: query: SELECT
- avg(cbigint) as c1,
- variance(cbigint),
- var_pop(cbigint),
- var_samp(cbigint),
- std(cbigint),
- stddev(cbigint),
- stddev_pop(cbigint),
- stddev_samp(cbigint)
-FROM alltypesparquet
-ORDER BY c1
-PREHOOK: type: QUERY
-PREHOOK: Input: default@alltypesparquet
-#### A masked pattern was here ####
-POSTHOOK: query: SELECT
- avg(cbigint) as c1,
- variance(cbigint),
- var_pop(cbigint),
- var_samp(cbigint),
- std(cbigint),
- stddev(cbigint),
- stddev_pop(cbigint),
- stddev_samp(cbigint)
-FROM alltypesparquet
-ORDER BY c1
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@alltypesparquet
-#### A masked pattern was here ####
--1.8515862077935246E8 2.07689300543081907E18 2.07689300543081907E18 2.07711944383088768E18 1.441142951074188E9 1.441142951074188E9 1.441142951074188E9 1.4412215110214279E9
-PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
-SELECT MIN(cfloat) as c1,
- MAX(cfloat),
- COUNT(cfloat),
- COUNT(*)
-FROM alltypesparquet
-ORDER BY c1
-PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
-SELECT MIN(cfloat) as c1,
- MAX(cfloat),
- COUNT(cfloat),
- COUNT(*)
-FROM alltypesparquet
-ORDER BY c1
-POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
-STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
- Stage: Stage-1
- Spark
- Edges:
- Reducer 2 <- Map 1 (GROUP, 1)
- Reducer 3 <- Reducer 2 (SORT, 1)
-#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: alltypesparquet
- Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
- vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>]
- Select Operator
- expressions: cfloat (type: float)
- outputColumnNames: cfloat
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [4]
- Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: min(cfloat), max(cfloat), count(cfloat), count()
- Group By Vectorization:
- aggregators: VectorUDAFMinDouble(col 4:float) -> float, VectorUDAFMaxDouble(col 4:float) -> float, VectorUDAFCount(col 4:float) -> bigint, VectorUDAFCountStar(*) -> bigint
- className: VectorGroupByOperator
- groupByMode: HASH
- native: false
- vectorProcessingMode: HASH
- projectedOutputColumnNums: [0, 1, 2, 3]
- mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Reduce Sink Vectorization:
- className: VectorReduceSinkEmptyKeyOperator
- keyColumnNums: []
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- valueColumnNums: [0, 1, 2, 3]
- Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: float), _col1 (type: float), _col2 (type: bigint), _col3 (type: bigint)
- Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: []
- featureSupportInUse: []
- inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 12
- includeColumns: [4]
- dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
- partitionColumnCount: 0
- scratchColumnTypeNames: []
- Reducer 2
- Execution mode: vectorized
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- reduceColumnNullOrder:
- reduceColumnSortOrder:
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 4
- dataColumns: VALUE._col0:float, VALUE._col1:float, VALUE._col2:bigint, VALUE._col3:bigint
- partitionColumnCount: 0
- scratchColumnTypeNames: []
- Reduce Operator Tree:
- Group By Operator
- aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3)
- Group By Vectorization:
- aggregators: VectorUDAFMinDouble(col 0:float) -> float, VectorUDAFMaxDouble(col 1:float) -> float, VectorUDAFCountMerge(col 2:bigint) -> bigint, VectorUDAFCountMerge(col 3:bigint) -> bigint
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- native: false
- vectorProcessingMode: GLOBAL
- projectedOutputColumnNums: [0, 1, 2, 3]
- mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: float)
- sort order: +
- Reduce Sink Vectorization:
- className: VectorReduceSinkObjectHashOperator
- keyColumnNums: [0]
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- valueColumnNums: [1, 2, 3]
- Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: float), _col2 (type: bigint), _col3 (type: bigint)
- Reducer 3
- Execution mode: vectorized
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- reduceColumnNullOrder: a
- reduceColumnSortOrder: +
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 4
- dataColumns: KEY.reducesinkkey0:float, VALUE._col0:float, VALUE._col1:bigint, VALUE._col2:bigint
- partitionColumnCount: 0
- scratchColumnTypeNames: []
- Reduce Operator Tree:
- Select Operator
- expressions: KEY.reducesinkkey0 (type: float), VALUE._col0 (type: float), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint)
- outputColumnNames: _col0, _col1, _col2, _col3
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 1, 2, 3]
- Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
- Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Processor Tree:
- ListSink
-
-PREHOOK: query: SELECT MIN(cfloat) as c1,
- MAX(cfloat),
- COUNT(cfloat),
- COUNT(*)
-FROM alltypesparquet
-ORDER BY c1
-PREHOOK: type: QUERY
-PREHOOK: Input: default@alltypesparquet
-#### A masked pattern was here ####
-POSTHOOK: query: SELECT MIN(cfloat) as c1,
- MAX(cfloat),
- COUNT(cfloat),
- COUNT(*)
-FROM alltypesparquet
-ORDER BY c1
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@alltypesparquet
-#### A masked pattern was here ####
--64.0 79.553 9173 12288
-PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
-SELECT SUM(cfloat) as c1
-FROM alltypesparquet
-ORDER BY c1
-PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
-SELECT SUM(cfloat) as c1
-FROM alltypesparquet
-ORDER BY c1
-POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
-STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
- Stage: Stage-1
- Spark
- Edges:
- Reducer 2 <- Map 1 (GROUP, 1)
- Reducer 3 <- Reducer 2 (SORT, 1)
-#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: alltypesparquet
- Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
- vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>]
- Select Operator
- expressions: cfloat (type: float)
- outputColumnNames: cfloat
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [4]
- Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: sum(cfloat)
- Group By Vectorization:
- aggregators: VectorUDAFSumDouble(col 4:float) -> double
- className: VectorGroupByOperator
- groupByMode: HASH
- native: false
- vectorProcessingMode: HASH
- projectedOutputColumnNums: [0]
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Reduce Sink Vectorization:
- className: VectorReduceSinkEmptyKeyOperator
- keyColumnNums: []
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- valueColumnNums: [0]
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: double)
- Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: []
- featureSupportInUse: []
- inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 12
- includeColumns: [4]
- dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
- partitionColumnCount: 0
- scratchColumnTypeNames: []
- Reducer 2
- Execution mode: vectorized
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- reduceColumnNullOrder:
- reduceColumnSortOrder:
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 1
- dataColumns: VALUE._col0:double
- partitionColumnCount: 0
- scratchColumnTypeNames: []
- Reduce Operator Tree:
- Group By Operator
- aggregations: sum(VALUE._col0)
- Group By Vectorization:
- aggregators: VectorUDAFSumDouble(col 0:double) -> double
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- native: false
- vectorProcessingMode: GLOBAL
- projectedOutputColumnNums: [0]
- mode: mergepartial
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: double)
- sort order: +
- Reduce Sink Vectorization:
- className: VectorReduceSinkObjectHashOperator
- keyColumnNums: [0]
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- valueColumnNums: []
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reducer 3
- Execution mode: vectorized
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- reduceColumnNullOrder: a
- reduceColumnSortOrder: +
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 1
- dataColumns: KEY.reducesinkkey0:double
- partitionColumnCount: 0
- scratchColumnTypeNames: []
- Reduce Operator Tree:
- Select Operator
- expressions: KEY.reducesinkkey0 (type: double)
- outputColumnNames: _col0
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0]
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Processor Tree:
- ListSink
-
-PREHOOK: query: SELECT SUM(cfloat) as c1
-FROM alltypesparquet
-ORDER BY c1
-PREHOOK: type: QUERY
-PREHOOK: Input: default@alltypesparquet
-#### A masked pattern was here ####
-POSTHOOK: query: SELECT SUM(cfloat) as c1
-FROM alltypesparquet
-ORDER BY c1
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@alltypesparquet
-#### A masked pattern was here ####
--39479.635992884636
-PREHOOK: query: EXPLAIN VECTORIZATION
-SELECT
- avg(cfloat) as c1,
- variance(cfloat),
- var_pop(cfloat),
- var_samp(cfloat),
- std(cfloat),
- stddev(cfloat),
- stddev_pop(cfloat),
- stddev_samp(cfloat)
-FROM alltypesparquet
-ORDER BY c1
-PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN VECTORIZATION
-SELECT
- avg(cfloat) as c1,
- variance(cfloat),
- var_pop(cfloat),
- var_samp(cfloat),
- std(cfloat),
- stddev(cfloat),
- stddev_pop(cfloat),
- stddev_samp(cfloat)
-FROM alltypesparquet
-ORDER BY c1
-POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
-STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
- Stage: Stage-1
- Spark
- Edges:
- Reducer 2 <- Map 1 (GROUP, 1)
- Reducer 3 <- Reducer 2 (SORT, 1)
-#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: alltypesparquet
- Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: cfloat (type: float)
- outputColumnNames: cfloat
- Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: avg(cfloat), variance(cfloat), var_pop(cfloat), var_samp(cfloat), std(cfloat), stddev(cfloat), stddev_pop(cfloat), stddev_samp(cfloat)
- mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
- Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: struct<count:bigint,sum:double,input:float>), _col1 (type: struct<count:bigint,sum:double,variance:double>), _col2 (type: struct<count:bigint,sum:double,variance:double>), _col3 (type: struct<count:bigint,sum:double,variance:double>), _col4 (type: struct<count:bigint,sum:double,variance:double>), _col5 (type: struct<count:bigint,sum:double,variance:double>), _col6 (type: struct<count:bigint,sum:double,variance:double>), _col7 (type: struct<count:bigint,sum:double,variance:double>)
- Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: []
- featureSupportInUse: []
- inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- Reducer 2
- Execution mode: vectorized
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- Reduce Operator Tree:
- Group By Operator
- aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7)
- mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
- Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: double)
- sort order: +
- Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double)
- Reducer 3
- Execution mode: vectorized
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- Reduce Operator Tree:
- Select Operator
- expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: double)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
- Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Processor Tree:
- ListSink
-
-PREHOOK: query: SELECT
- avg(cfloat) as c1,
- variance(cfloat),
- var_pop(cfloat),
- var_samp(cfloat),
- std(cfloat),
- stddev(cfloat),
- stddev_pop(cfloat),
- stddev_samp(cfloat)
-FROM alltypesparquet
-ORDER BY c1
-PREHOOK: type: QUERY
-PREHOOK: Input: default@alltypesparquet
-#### A masked pattern was here ####
-POSTHOOK: query: SELECT
- avg(cfloat) as c1,
- variance(cfloat),
- var_pop(cfloat),
- var_samp(cfloat),
- std(cfloat),
- stddev(cfloat),
- stddev_pop(cfloat),
- stddev_samp(cfloat)
-FROM alltypesparquet
-ORDER BY c1
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@alltypesparquet
-#### A masked pattern was here ####
--4.303895780321011 1163.8972588604984 1163.8972588604984 1164.0241556397025 34.115938487171924 34.115938487171924 34.115938487171924 34.11779822379666
-WARNING: Comparing a bigint and a double may result in a loss of precision.
-PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
-SELECT AVG(cbigint),
- (-(AVG(cbigint))),
- (-6432 + AVG(cbigint)),
- STDDEV_POP(cbigint),
- (-((-6432 + AVG(cbigint)))),
- ((-((-6432 + AVG(cbigint)))) + (-6432 + AVG(cbigint))),
- VAR_SAMP(cbigint),
- (-((-6432 + AVG(cbigint)))),
- (-6432 + (-((-6432 + AVG(cbigint))))),
- (-((-6432 + AVG(cbigint)))),
- ((-((-6432 + AVG(cbigint)))) / (-((-6432 + AVG(cbigint))))),
- COUNT(*),
- SUM(cfloat),
- (VAR_SAMP(cbigint) % STDDEV_POP(cbigint)),
- (-(VAR_SAMP(cbigint))),
- ((-((-6432 + AVG(cbigint)))) * (-(AVG(cbigint)))),
- MIN(ctinyint),
- (-(MIN(ctinyint)))
-FROM alltypesparquet
-WHERE (((cstring2 LIKE '%b%')
- OR ((79.553 != cint)
- OR (cbigint < cdouble)))
- OR ((ctinyint >= csmallint)
- AND ((cboolean2 = 1)
- AND (3569 = ctinyint))))
-PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
-SELECT AVG(cbigint),
- (-(AVG(cbigint))),
- (-6432 + AVG(cbigint)),
- STDDEV_POP(cbigint),
- (-((-6432 + AVG(cbigint)))),
- ((-((-6432 + AVG(cbigint)))) + (-6432 + AVG(cbigint))),
- VAR_SAMP(cbigint),
- (-((-6432 + AVG(cbigint)))),
- (-6432 + (-((-6432 + AVG(cbigint))))),
- (-((-6432 + AVG(cbigint)))),
- ((-((-6432 + AVG(cbigint)))) / (-((-6432 + AVG(cbigint))))),
- COUNT(*),
- SUM(cfloat),
- (VAR_SAMP(cbigint) % STDDEV_POP(cbigint)),
- (-(VAR_SAMP(cbigint))),
- ((-((-6432 + AVG(cbigint)))) * (-(AVG(cbigint)))),
- MIN(ctinyint),
- (-(MIN(ctinyint)))
-FROM alltypesparquet
-WHERE (((cstring2 LIKE '%b%')
- OR ((79.553 != cint)
- OR (cbigint < cdouble)))
- OR ((ctinyint >= csmallint)
- AND ((cboolean2 = 1)
- AND (3569 = ctinyint))))
-POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
-STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
- Stage: Stage-1
- Spark
- Edges:
- Reducer 2 <- Map 1 (GROUP, 1)
-#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: alltypesparquet
- Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
- vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>]
- Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterExprOrExpr(children: FilterStringColLikeStringScalar(col 7:string, pattern %b%), FilterDecimalScalarNotEqualDecimalColumn(val 79.553, col 13:decimal(13,3))(children: CastLongToDecimal(col 2:int) -> 13:decimal(13,3)), FilterDoubleColLessDoubleColumn(col 14:double, col 5:double)(children: CastLongToDouble(col 3:bigint) -> 14:double), FilterExprAndExpr(children: FilterLongColGreaterEqualLongColumn(col 0:smallint, col 1:smallint)(children: col 0:tinyint), FilterLongColEqualLongScalar(col 11:boolean, val 1), FilterLongScalarEqualLongColumn(val 3569, col 0:int)(children: col 0:tinyint)))
- predicate: (((UDFToShort(ctinyint) >= csmallint) and (cboolean2 = 1) and (3569 = UDFToInteger(ctinyint))) or (79.553 <> CAST( cint AS decimal(13,3))) or (UDFToDouble(cbigint) < cdouble) or (cstring2 like '%b%')) (type: boolean)
- Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: ctinyint (type: tinyint), cbigint (type: bigint), cfloat (type: float)
- outputColumnNames: ctinyint, cbigint, cfloat
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 3, 4]
- Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: avg(cbigint), stddev_pop(cbigint), var_samp(cbigint), count(), sum(cfloat), min(ctinyint)
- Group By Vectorization:
- aggregators: VectorUDAFAvgLong(col 3:bigint) -> struct<count:bigint,sum:double,input:bigint>, VectorUDAFVarLong(col 3:bigint) -> struct<count:bigint,sum:double,variance:double> aggregation: stddev_pop, VectorUDAFVarLong(col 3:bigint) -> struct<count:bigint,sum:double,variance:double> aggregation: var_samp, VectorUDAFCountStar(*) -> bigint, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFMinLong(col 0:tinyint) -> tinyint
- className: VectorGroupByOperator
- groupByMode: HASH
- native: false
- vectorProcessingMode: HASH
- projectedOutputColumnNums: [0, 1, 2, 3, 4, 5]
- mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
- Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Reduce Sink Vectorization:
- className: VectorReduceSinkEmptyKeyOperator
- keyColumnNums: []
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- valueColumnNums: [0, 1, 2, 3, 4, 5]
- Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: struct<count:bigint,sum:double,input:bigint>), _col1 (type: struct<count:bigint,sum:double,variance:double>), _col2 (type: struct<count:bigint,sum:double,variance:double>), _col3 (type: bigint), _col4 (type: double), _col5 (type: tinyint)
- Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: []
- featureSupportInUse: []
- inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 12
- includeColumns: [0, 1, 2, 3, 4, 5, 7, 11]
- dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
- partitionColumnCount: 0
- scratchColumnTypeNames: [decimal(13,3), double]
- Reducer 2
- Execution mode: vectorized
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- reduceColumnNullOrder:
- reduceColumnSortOrder:
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 6
- dataColumns: VALUE._col0:struct<count:bigint,sum:double,input:bigint>, VALUE._col1:struct<count:bigint,sum:double,variance:double>, VALUE._col2:struct<count:bigint,sum:double,variance:double>, VALUE._col3:bigint, VALUE._col4:double, VALUE._col5:tinyint
- partitionColumnCount: 0
- scratchColumnTypeNames: []
- Reduce Operator Tree:
- Group By Operator
- aggregations: avg(VALUE._col0), stddev_pop(VALUE._col1), var_samp(VALUE._col2), count(VALUE._col3), sum(VALUE._col4), min(VALUE._col5)
- Group By Vectorization:
- aggregators: VectorUDAFAvgFinal(col 0:struct<count:bigint,sum:double,input:bigint>) -> double, VectorUDAFVarFinal(col 1:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_pop, VectorUDAFVarFinal(col 2:struct<count:bigint,sum:double,variance:double>) -> double aggregation: var_samp, VectorUDAFCountMerge(col 3:bigint) -> bigint, VectorUDAFSumDouble(col 4:double) -> double, VectorUDAFMinLong(col 5:tinyint) -> tinyint
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- native: false
- vectorProcessingMode: GLOBAL
- projectedOutputColumnNums: [0, 1, 2, 3, 4, 5]
- mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
- Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: double), (- _col0) (type: double), (-6432.0 + _col0) (type: double), _col1 (type: double), (- (-6432.0 + _col0)) (type: double), ((- (-6432.0 + _col0)) + (-6432.0 + _col0)) (type: double), _col2 (type: double), (- (-6432.0 + _col0)) (type: double), (-6432.0 + (- (-6432.0 + _col0))) (type: double), (- (-6432.0 + _col0)) (type: double), ((- (-6432.0 + _col0)) / (- (-6432.0 + _col0))) (type: double), _col3 (type: bigint), _col4 (type: double), (_col2 % _col1) (type: double), (- _col2) (type: double), ((- (-6432.0 + _col0)) * (- _col0)) (type: double), _col5 (type: tinyint), (- _col5) (type: tinyint)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 6, 7, 1, 9, 11, 2, 10, 8, 13, 12, 3, 4, 14, 15, 18, 5, 19]
- selectExpressions: DoubleColUnaryMinus(col 0:double) -> 6:double, DoubleScalarAddDoubleColumn(val -6432.0, col 0:double) -> 7:double, DoubleColUnaryMinus(col 8:double)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 0:double) -> 8:double) -> 9:double, DoubleColAddDoubleColumn(col 10:double, col 8:double)(children: DoubleColUnaryMinus(col 8:double)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 0:double) -> 8:double) -> 10:double, DoubleScalarAddDoubleColumn(val -6432.0, col 0:double) -> 8:double) -> 11:double, DoubleColUnaryMinus(col 8:double)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 0:double) -> 8:double) -> 10:double, DoubleScalarAddDoubleColumn(val -6432.0, col 12:double)(children: DoubleColUnaryMinus(col 8:double)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 0:double) -> 8:double) -> 12:double) -> 8:double, DoubleColUnaryMinus(col 12:double)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 0:double) -> 12:double)
-> 13:double, DoubleColDivideDoubleColumn(col 14:double, col 15:double)(children: DoubleColUnaryMinus(col 12:double)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 0:double) -> 12:double) -> 14:double, DoubleColUnaryMinus(col 12:double)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 0:double) -> 12:double) -> 15:double) -> 12:double, DoubleColModuloDoubleColumn(col 2:double, col 1:double) -> 14:double, DoubleColUnaryMinus(col 2:double) -> 15:double, DoubleColMultiplyDoubleColumn(col 17:double, col 16:double)(children: DoubleColUnaryMinus(col 16:double)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 0:double) -> 16:double) -> 17:double, DoubleColUnaryMinus(col 0:double) -> 16:double) -> 18:double, LongColUnaryMinus(col 5:tinyint) -> 19:tinyint
- Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
- Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Processor Tree:
- ListSink
-
-WARNING: Comparing a bigint and a double may result in a loss of precision.
-PREHOOK: query: SELECT AVG(cbigint),
- (-(AVG(cbigint))),
- (-6432 + AVG(cbigint)),
- STDDEV_POP(cbigint),
- (-((-6432 + AVG(cbigint)))),
- ((-((-6432 + AVG(cbigint)))) + (-6432 + AVG(cbigint))),
- VAR_SAMP(cbigint),
- (-((-6432 + AVG(cbigint)))),
- (-6432 + (-((-6432 + AVG(cbigint))))),
- (-((-6432 + AVG(cbigint)))),
- ((-((-6432 + AVG(cbigint)))) / (-((-6432 + AVG(cbigint))))),
- COUNT(*),
- SUM(cfloat),
- (VAR_SAMP(cbigint) % STDDEV_POP(cbigint)),
- (-(VAR_SAMP(cbigint))),
- ((-((-6432 + AVG(cbigint)))) * (-(AVG(cbigint)))),
- MIN(ctinyint),
- (-(MIN(ctinyint)))
-FROM alltypesparquet
-WHERE (((cstring2 LIKE '%b%')
- OR ((79.553 != cint)
- OR (cbigint < cdouble)))
- OR ((ctinyint >= csmallint)
- AND ((cboolean2 = 1)
- AND (3569 = ctinyint))))
-PREHOOK: type: QUERY
-PREHOOK: Input: default@alltypesparquet
-#### A masked pattern was here ####
-POSTHOOK: query: SELECT AVG(cbigint),
- (-(AVG(cbigint))),
- (-6432 + AVG(cbigint)),
- STDDEV_POP(cbigint),
- (-((-6432 + AVG
<TRUNCATED>