You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by vi...@apache.org on 2017/11/29 18:01:34 UTC
[20/29] hive git commit: HIVE-17528 : Add more q-tests for Hive-on-Spark with Parquet vectorized reader (Ferdinand Xu, reviewed by Vihang Karajgaonkar)

http://git-wip-us.apache.org/repos/asf/hive/blob/9a59592e/ql/src/test/results/clientpositive/parquet_vectorization_5.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_5.q.out b/ql/src/test/results/clientpositive/parquet_vectorization_5.q.out
new file mode 100644
index 0000000..b445dfb
--- /dev/null
+++ b/ql/src/test/results/clientpositive/parquet_vectorization_5.q.out
@@ -0,0 +1,181 @@
+PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
+SELECT MAX(csmallint),
+       (MAX(csmallint) * -75),
+       COUNT(*),
+       ((MAX(csmallint) * -75) / COUNT(*)),
+       (6981 * MAX(csmallint)),
+       MIN(csmallint),
+       (-(MIN(csmallint))),
+       (197 % ((MAX(csmallint) * -75) / COUNT(*))),
+       SUM(cint),
+       MAX(ctinyint),
+       (-(MAX(ctinyint))),
+       ((-(MAX(ctinyint))) + MAX(ctinyint))
+FROM   alltypesparquet
+WHERE  (((cboolean2 IS NOT NULL)
+         AND (cstring1 LIKE '%b%'))
+        OR ((ctinyint = cdouble)
+            AND ((ctimestamp2 IS NOT NULL)
+                 AND (cstring2 LIKE 'a'))))
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
+SELECT MAX(csmallint),
+       (MAX(csmallint) * -75),
+       COUNT(*),
+       ((MAX(csmallint) * -75) / COUNT(*)),
+       (6981 * MAX(csmallint)),
+       MIN(csmallint),
+       (-(MIN(csmallint))),
+       (197 % ((MAX(csmallint) * -75) / COUNT(*))),
+       SUM(cint),
+       MAX(ctinyint),
+       (-(MAX(ctinyint))),
+       ((-(MAX(ctinyint))) + MAX(ctinyint))
+FROM   alltypesparquet
+WHERE  (((cboolean2 IS NOT NULL)
+         AND (cstring1 LIKE '%b%'))
+        OR ((ctinyint = cdouble)
+            AND ((ctimestamp2 IS NOT NULL)
+                 AND (cstring2 LIKE 'a'))))
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: alltypesparquet
+            Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
+            TableScan Vectorization:
+                native: true
+                vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>]
+            Filter Operator
+              Filter Vectorization:
+                  className: VectorFilterOperator
+                  native: true
+                  predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: SelectColumnIsNotNull(col 11:boolean), FilterStringColLikeStringScalar(col 6:string, pattern %b%)), FilterExprAndExpr(children: FilterDoubleColEqualDoubleColumn(col 13:double, col 5:double)(children: CastLongToDouble(col 0:tinyint) -> 13:double), SelectColumnIsNotNull(col 9:timestamp), FilterStringColLikeStringScalar(col 7:string, pattern a)))
+              predicate: (((UDFToDouble(ctinyint) = cdouble) and ctimestamp2 is not null and (cstring2 like 'a')) or (cboolean2 is not null and (cstring1 like '%b%'))) (type: boolean)
+              Statistics: Num rows: 9216 Data size: 110592 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int)
+                outputColumnNames: ctinyint, csmallint, cint
+                Select Vectorization:
+                    className: VectorSelectOperator
+                    native: true
+                    projectedOutputColumnNums: [0, 1, 2]
+                Statistics: Num rows: 9216 Data size: 110592 Basic stats: COMPLETE Column stats: NONE
+                Group By Operator
+                  aggregations: max(csmallint), count(), min(csmallint), sum(cint), max(ctinyint)
+                  Group By Vectorization:
+                      aggregators: VectorUDAFMaxLong(col 1:smallint) -> smallint, VectorUDAFCountStar(*) -> bigint, VectorUDAFMinLong(col 1:smallint) -> smallint, VectorUDAFSumLong(col 2:int) -> bigint, VectorUDAFMaxLong(col 0:tinyint) -> tinyint
+                      className: VectorGroupByOperator
+                      groupByMode: HASH
+                      native: false
+                      vectorProcessingMode: HASH
+                      projectedOutputColumnNums: [0, 1, 2, 3, 4]
+                  mode: hash
+                  outputColumnNames: _col0, _col1, _col2, _col3, _col4
+                  Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    sort order: 
+                    Reduce Sink Vectorization:
+                        className: VectorReduceSinkOperator
+                        native: false
+                        nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                        nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
+                    Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: NONE
+                    value expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: tinyint)
+      Execution mode: vectorized
+      Map Vectorization:
+          enabled: true
+          enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+          inputFormatFeatureSupport: []
+          featureSupportInUse: []
+          inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
+          allNative: false
+          usesVectorUDFAdaptor: false
+          vectorized: true
+          rowBatchContext:
+              dataColumnCount: 12
+              includeColumns: [0, 1, 2, 5, 6, 7, 9, 11]
+              dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
+              partitionColumnCount: 0
+              scratchColumnTypeNames: [double]
+      Reduce Vectorization:
+          enabled: false
+          enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
+          enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: max(VALUE._col0), count(VALUE._col1), min(VALUE._col2), sum(VALUE._col3), max(VALUE._col4)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2, _col3, _col4
+          Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: smallint), (UDFToInteger(_col0) * -75) (type: int), _col1 (type: bigint), (UDFToDouble((UDFToInteger(_col0) * -75)) / UDFToDouble(_col1)) (type: double), (6981 * UDFToInteger(_col0)) (type: int), _col2 (type: smallint), (- _col2) (type: smallint), (197.0 % (UDFToDouble((UDFToInteger(_col0) * -75)) / UDFToDouble(_col1))) (type: double), _col3 (type: bigint), _col4 (type: tinyint), (- _col4) (type: tinyint), ((- _col4) + _col4) (type: tinyint)
+            outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
+            Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT MAX(csmallint),
+       (MAX(csmallint) * -75),
+       COUNT(*),
+       ((MAX(csmallint) * -75) / COUNT(*)),
+       (6981 * MAX(csmallint)),
+       MIN(csmallint),
+       (-(MIN(csmallint))),
+       (197 % ((MAX(csmallint) * -75) / COUNT(*))),
+       SUM(cint),
+       MAX(ctinyint),
+       (-(MAX(ctinyint))),
+       ((-(MAX(ctinyint))) + MAX(ctinyint))
+FROM   alltypesparquet
+WHERE  (((cboolean2 IS NOT NULL)
+         AND (cstring1 LIKE '%b%'))
+        OR ((ctinyint = cdouble)
+            AND ((ctimestamp2 IS NOT NULL)
+                 AND (cstring2 LIKE 'a'))))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesparquet
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT MAX(csmallint),
+       (MAX(csmallint) * -75),
+       COUNT(*),
+       ((MAX(csmallint) * -75) / COUNT(*)),
+       (6981 * MAX(csmallint)),
+       MIN(csmallint),
+       (-(MIN(csmallint))),
+       (197 % ((MAX(csmallint) * -75) / COUNT(*))),
+       SUM(cint),
+       MAX(ctinyint),
+       (-(MAX(ctinyint))),
+       ((-(MAX(ctinyint))) + MAX(ctinyint))
+FROM   alltypesparquet
+WHERE  (((cboolean2 IS NOT NULL)
+         AND (cstring1 LIKE '%b%'))
+        OR ((ctinyint = cdouble)
+            AND ((ctimestamp2 IS NOT NULL)
+                 AND (cstring2 LIKE 'a'))))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesparquet
+#### A masked pattern was here ####
+16343	-1225725	1070	-1145.53738317757	114090483	-16307	16307	197.0	-26853917571	11	-11	0