You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by vi...@apache.org on 2017/11/29 18:01:34 UTC
[20/29] hive git commit: HIVE-17528 : Add more q-tests for
Hive-on-Spark with Parquet vectorized reader (Ferdinand Xu,
reviewed by Vihang Karajgaonkar)
http://git-wip-us.apache.org/repos/asf/hive/blob/9a59592e/ql/src/test/results/clientpositive/parquet_vectorization_5.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_5.q.out b/ql/src/test/results/clientpositive/parquet_vectorization_5.q.out
new file mode 100644
index 0000000..b445dfb
--- /dev/null
+++ b/ql/src/test/results/clientpositive/parquet_vectorization_5.q.out
@@ -0,0 +1,181 @@
+PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
+SELECT MAX(csmallint),
+ (MAX(csmallint) * -75),
+ COUNT(*),
+ ((MAX(csmallint) * -75) / COUNT(*)),
+ (6981 * MAX(csmallint)),
+ MIN(csmallint),
+ (-(MIN(csmallint))),
+ (197 % ((MAX(csmallint) * -75) / COUNT(*))),
+ SUM(cint),
+ MAX(ctinyint),
+ (-(MAX(ctinyint))),
+ ((-(MAX(ctinyint))) + MAX(ctinyint))
+FROM alltypesparquet
+WHERE (((cboolean2 IS NOT NULL)
+ AND (cstring1 LIKE '%b%'))
+ OR ((ctinyint = cdouble)
+ AND ((ctimestamp2 IS NOT NULL)
+ AND (cstring2 LIKE 'a'))))
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
+SELECT MAX(csmallint),
+ (MAX(csmallint) * -75),
+ COUNT(*),
+ ((MAX(csmallint) * -75) / COUNT(*)),
+ (6981 * MAX(csmallint)),
+ MIN(csmallint),
+ (-(MIN(csmallint))),
+ (197 % ((MAX(csmallint) * -75) / COUNT(*))),
+ SUM(cint),
+ MAX(ctinyint),
+ (-(MAX(ctinyint))),
+ ((-(MAX(ctinyint))) + MAX(ctinyint))
+FROM alltypesparquet
+WHERE (((cboolean2 IS NOT NULL)
+ AND (cstring1 LIKE '%b%'))
+ OR ((ctinyint = cdouble)
+ AND ((ctimestamp2 IS NOT NULL)
+ AND (cstring2 LIKE 'a'))))
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: alltypesparquet
+ Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>]
+ Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: SelectColumnIsNotNull(col 11:boolean), FilterStringColLikeStringScalar(col 6:string, pattern %b%)), FilterExprAndExpr(children: FilterDoubleColEqualDoubleColumn(col 13:double, col 5:double)(children: CastLongToDouble(col 0:tinyint) -> 13:double), SelectColumnIsNotNull(col 9:timestamp), FilterStringColLikeStringScalar(col 7:string, pattern a)))
+ predicate: (((UDFToDouble(ctinyint) = cdouble) and ctimestamp2 is not null and (cstring2 like 'a')) or (cboolean2 is not null and (cstring1 like '%b%'))) (type: boolean)
+ Statistics: Num rows: 9216 Data size: 110592 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int)
+ outputColumnNames: ctinyint, csmallint, cint
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 1, 2]
+ Statistics: Num rows: 9216 Data size: 110592 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: max(csmallint), count(), min(csmallint), sum(cint), max(ctinyint)
+ Group By Vectorization:
+ aggregators: VectorUDAFMaxLong(col 1:smallint) -> smallint, VectorUDAFCountStar(*) -> bigint, VectorUDAFMinLong(col 1:smallint) -> smallint, VectorUDAFSumLong(col 2:int) -> bigint, VectorUDAFMaxLong(col 0:tinyint) -> tinyint
+ className: VectorGroupByOperator
+ groupByMode: HASH
+ native: false
+ vectorProcessingMode: HASH
+ projectedOutputColumnNums: [0, 1, 2, 3, 4]
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
+ Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: tinyint)
+ Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: []
+ featureSupportInUse: []
+ inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 12
+ includeColumns: [0, 1, 2, 5, 6, 7, 9, 11]
+ dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
+ partitionColumnCount: 0
+ scratchColumnTypeNames: [double]
+ Reduce Vectorization:
+ enabled: false
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
+ enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: max(VALUE._col0), count(VALUE._col1), min(VALUE._col2), sum(VALUE._col3), max(VALUE._col4)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: smallint), (UDFToInteger(_col0) * -75) (type: int), _col1 (type: bigint), (UDFToDouble((UDFToInteger(_col0) * -75)) / UDFToDouble(_col1)) (type: double), (6981 * UDFToInteger(_col0)) (type: int), _col2 (type: smallint), (- _col2) (type: smallint), (197.0 % (UDFToDouble((UDFToInteger(_col0) * -75)) / UDFToDouble(_col1))) (type: double), _col3 (type: bigint), _col4 (type: tinyint), (- _col4) (type: tinyint), ((- _col4) + _col4) (type: tinyint)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
+ Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT MAX(csmallint),
+ (MAX(csmallint) * -75),
+ COUNT(*),
+ ((MAX(csmallint) * -75) / COUNT(*)),
+ (6981 * MAX(csmallint)),
+ MIN(csmallint),
+ (-(MIN(csmallint))),
+ (197 % ((MAX(csmallint) * -75) / COUNT(*))),
+ SUM(cint),
+ MAX(ctinyint),
+ (-(MAX(ctinyint))),
+ ((-(MAX(ctinyint))) + MAX(ctinyint))
+FROM alltypesparquet
+WHERE (((cboolean2 IS NOT NULL)
+ AND (cstring1 LIKE '%b%'))
+ OR ((ctinyint = cdouble)
+ AND ((ctimestamp2 IS NOT NULL)
+ AND (cstring2 LIKE 'a'))))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesparquet
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT MAX(csmallint),
+ (MAX(csmallint) * -75),
+ COUNT(*),
+ ((MAX(csmallint) * -75) / COUNT(*)),
+ (6981 * MAX(csmallint)),
+ MIN(csmallint),
+ (-(MIN(csmallint))),
+ (197 % ((MAX(csmallint) * -75) / COUNT(*))),
+ SUM(cint),
+ MAX(ctinyint),
+ (-(MAX(ctinyint))),
+ ((-(MAX(ctinyint))) + MAX(ctinyint))
+FROM alltypesparquet
+WHERE (((cboolean2 IS NOT NULL)
+ AND (cstring1 LIKE '%b%'))
+ OR ((ctinyint = cdouble)
+ AND ((ctimestamp2 IS NOT NULL)
+ AND (cstring2 LIKE 'a'))))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesparquet
+#### A masked pattern was here ####
+16343 -1225725 1070 -1145.53738317757 114090483 -16307 16307 197.0 -26853917571 11 -11 0