You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by vi...@apache.org on 2017/11/29 18:01:16 UTC
[02/29] hive git commit: HIVE-17528 : Add more q-tests for
Hive-on-Spark with Parquet vectorized reader (Ferdinand Xu,
reviewed by Vihang Karajgaonkar)
http://git-wip-us.apache.org/repos/asf/hive/blob/9a59592e/ql/src/test/results/clientpositive/spark/parquet_vectorization_div0.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_div0.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_div0.q.out
new file mode 100644
index 0000000..5ba7587
--- /dev/null
+++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_div0.q.out
@@ -0,0 +1,608 @@
+PREHOOK: query: explain vectorization expression
+select cdouble / 0.0 from alltypesparquet limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization expression
+select cdouble / 0.0 from alltypesparquet limit 100
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: alltypesparquet
+ Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ Select Operator
+ expressions: (cdouble / 0.0) (type: double)
+ outputColumnNames: _col0
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [13]
+ selectExpressions: DoubleColDivideDoubleScalar(col 5:double, val 0.0) -> 13:double
+ Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Limit Vectorization:
+ className: VectorLimitOperator
+ native: true
+ Statistics: Num rows: 100 Data size: 1200 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 100 Data size: 1200 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: []
+ featureSupportInUse: []
+ inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 100
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select cdouble / 0.0 from alltypesparquet limit 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesparquet
+#### A masked pattern was here ####
+POSTHOOK: query: select cdouble / 0.0 from alltypesparquet limit 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesparquet
+#### A masked pattern was here ####
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+PREHOOK: query: explain vectorization expression
+select (cbigint - 988888L) as s1, cdouble / (cbigint - 988888L) as s2, 1.2 / (cbigint - 988888L)
+from alltypesparquet where cbigint > 0 and cbigint < 100000000 order by s1, s2 limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization expression
+select (cbigint - 988888L) as s1, cdouble / (cbigint - 988888L) as s2, 1.2 / (cbigint - 988888L)
+from alltypesparquet where cbigint > 0 and cbigint < 100000000 order by s1, s2 limit 100
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: alltypesparquet
+ Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterLongScalar(col 3:bigint, val 0), FilterLongColLessLongScalar(col 3:bigint, val 100000000))
+ predicate: ((cbigint < 100000000) and (cbigint > 0)) (type: boolean)
+ Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: (cbigint - 988888) (type: bigint), (cdouble / UDFToDouble((cbigint - 988888))) (type: double), (1.2 / CAST( (cbigint - 988888) AS decimal(19,0))) (type: decimal(22,21))
+ outputColumnNames: _col0, _col1, _col2
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [13, 16, 18]
+ selectExpressions: LongColSubtractLongScalar(col 3:bigint, val 988888) -> 13:bigint, DoubleColDivideDoubleColumn(col 5:double, col 15:double)(children: CastLongToDouble(col 14:bigint)(children: LongColSubtractLongScalar(col 3:bigint, val 988888) -> 14:bigint) -> 15:double) -> 16:double, DecimalScalarDivideDecimalColumn(val 1.2, col 17:decimal(19,0))(children: CastLongToDecimal(col 14:bigint)(children: LongColSubtractLongScalar(col 3:bigint, val 988888) -> 14:bigint) -> 17:decimal(19,0)) -> 18:decimal(22,21)
+ Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: bigint), _col1 (type: double)
+ sort order: ++
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col2 (type: decimal(22,21))
+ Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: []
+ featureSupportInUse: []
+ inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ Reducer 2
+ Execution mode: vectorized
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: bigint), KEY.reducesinkkey1 (type: double), VALUE._col0 (type: decimal(22,21))
+ outputColumnNames: _col0, _col1, _col2
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 1, 2]
+ Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Limit Vectorization:
+ className: VectorLimitOperator
+ native: true
+ Statistics: Num rows: 100 Data size: 1200 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 100 Data size: 1200 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 100
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select (cbigint - 988888L) as s1, cdouble / (cbigint - 988888L) as s2, 1.2 / (cbigint - 988888L)
+from alltypesparquet where cbigint > 0 and cbigint < 100000000 order by s1, s2 limit 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesparquet
+#### A masked pattern was here ####
+POSTHOOK: query: select (cbigint - 988888L) as s1, cdouble / (cbigint - 988888L) as s2, 1.2 / (cbigint - 988888L)
+from alltypesparquet where cbigint > 0 and cbigint < 100000000 order by s1, s2 limit 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesparquet
+#### A masked pattern was here ####
+-985319 NULL -0.000001217879691754650
+-985319 2.0297994862577501E-4 -0.000001217879691754650
+-63925 0.11256941728588189 -0.000018771998435666797
+0 NULL NULL
+0 NULL NULL
+0 NULL NULL
+0 NULL NULL
+0 NULL NULL
+0 NULL NULL
+0 NULL NULL
+0 NULL NULL
+0 NULL NULL
+392309 NULL 0.000003058813333367320
+673083 -0.010691103474608629 0.000001782841046349410
+2331159 NULL 0.000000514765402102559
+2342037 NULL 0.000000512374484263058
+3533105 -5.660743170667161E-5 0.000000339644590240030
+3768727 0.004139594085748318 0.000000318409903397089
+4728619 NULL 0.000000253773881972728
+5391403 NULL 0.000000222576572369010
+7022666 -0.0010246820794268159 0.000000170875277280736
+7470430 NULL 0.000000160633323650714
+8276429 NULL 0.000000144990067576246
+8286860 -8.683626850218298E-4 0.000000144807562816314
+8299981 -8.669899364829872E-4 0.000000144578644216174
+9247593 NULL 0.000000129763496295739
+9821695 -7.326637611939691E-4 0.000000122178503812224
+10000738 0.001559984873116364 0.000000119991144653525
+10081828 0.0015474376273826532 0.000000119026033770860
+10745355 -6.696847149303117E-4 0.000000111676161466978
+11127199 -1.797397530142132E-5 0.000000107843851808528
+11722580 NULL 0.000000102366543883684
+12649396 NULL 0.000000094866189658384
+13126214 -1.5236685917203544E-5 0.000000091420115503221
+14042667 NULL 0.000000085453852889910
+14943972 -1.3383322720358416E-5 0.000000080299936322150
+16259022 NULL 0.000000073805177211766
+16531556 -1.2098074736582569E-5 0.000000072588448419495
+16596157 NULL 0.000000072305895876979
+17058489 -1.1724367849930905E-5 0.000000070346207099585
+17247320 -4.172242412154468E-4 0.000000069576026884177
+19004427 8.209139901981786E-4 0.000000063143182375349
+19498517 NULL 0.000000061543141973310
+20165679 7.736411950224934E-4 0.000000059507046601307
+20547875 NULL 0.000000058400199534015
+23264783 NULL 0.000000051580107151655
+23475527 6.645644206411213E-4 0.000000051117063314489
+24379905 NULL 0.000000049220864478348
+24514624 -2.935390728407664E-4 0.000000048950373458716
+25154198 -2.860755091456305E-4 0.000000047705754721339
+25245192 -7.922300610745999E-6 0.000000047533803664476
+26610943 NULL 0.000000045094230595286
+27520143 5.668938566198584E-4 0.000000043604424584567
+27818379 NULL 0.000000043136949137115
+28400244 NULL 0.000000042253158106670
+28698999 5.43607810153936E-4 0.000000041813305056389
+28806400 -6.9429015774272385E-6 0.000000041657409464563
+29920877 5.214085135271938E-4 0.000000040105776311303
+33126539 NULL 0.000000036224732079617
+34603086 NULL 0.000000034678987879867
+35156265 NULL 0.000000034133318769784
+35862260 NULL 0.000000033461360215447
+36123797 -1.992038655294182E-4 0.000000033219099310075
+36341671 -1.980096072082101E-4 0.000000033019945615599
+36413215 -5.4925114412446145E-6 0.000000032955068647468
+36578596 4.2650625518814335E-4 0.000000032806070522772
+36796441 -1.955623914823719E-4 0.000000032611849607955
+39723587 NULL 0.000000030208752296211
+39985709 -1.7996429674411925E-4 0.000000030010722080731
+40018606 NULL 0.000000029986051987918
+41003161 NULL 0.000000029266036342905
+41158231 3.790493328053871E-4 0.000000029155772025285
+41848817 NULL 0.000000028674645689507
+44047567 -1.633688416888043E-4 0.000000027243275434487
+45125678 NULL 0.000000026592398234992
+45180154 NULL 0.000000026560334433566
+45717793 3.4124569399052136E-4 0.000000026247986205283
+46163162 NULL 0.000000025994753132379
+46525838 3.353190543284787E-4 0.000000025792120068853
+48626663 NULL 0.000000024677819244969
+49102701 -1.465499830650864E-4 0.000000024438574163161
+50300445 -1.4306036457530346E-4 0.000000023856647789100
+50929325 -1.412938420055636E-4 0.000000023562063702984
+52422534 -1.3726921327381848E-4 0.000000022890919389742
+52667422 2.9621727070673783E-4 0.000000022784483356713
+52962061 2.945693522010029E-4 0.000000022657728520044
+53695172 NULL 0.000000022348377988248
+54760317 NULL 0.000000021913678841560
+55020655 2.835480602693661E-4 0.000000021809991175132
+56102034 NULL 0.000000021389598815615
+56131313 NULL 0.000000021378441655195
+56838351 -3.5187509222426247E-6 0.000000021112505533456
+56997841 -3.5089048372902406E-6 0.000000021053429023741
+57778807 -1.2454393528755274E-4 0.000000020768860803928
+58080381 NULL 0.000000020661021490200
+58307527 NULL 0.000000020580533281749
+58536385 -1.2293208745295768E-4 0.000000020500070170032
+59347745 NULL 0.000000020219807846111
+60229567 NULL 0.000000019923769334088
+60330397 NULL 0.000000019890470801974
+PREHOOK: query: explain vectorization expression
+select (cdouble + 200.0) as s1, cbigint / (cdouble + 200.0) as s2, (cdouble + 200.0) / (cdouble + 200.0), cbigint / (cdouble + 200.0), 3 / (cdouble + 200.0), 1.2 / (cdouble + 200.0)
+from alltypesparquet where cdouble >= -500 and cdouble < -199 order by s1, s2 limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization expression
+select (cdouble + 200.0) as s1, cbigint / (cdouble + 200.0) as s2, (cdouble + 200.0) / (cdouble + 200.0), cbigint / (cdouble + 200.0), 3 / (cdouble + 200.0), 1.2 / (cdouble + 200.0)
+from alltypesparquet where cdouble >= -500 and cdouble < -199 order by s1, s2 limit 100
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: alltypesparquet
+ Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: FilterExprAndExpr(children: FilterDoubleColGreaterEqualDoubleScalar(col 5:double, val -500.0), FilterDoubleColLessDoubleScalar(col 5:double, val -199.0))
+ predicate: ((cdouble < -199.0) and (cdouble >= -500.0)) (type: boolean)
+ Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: (cdouble + 200.0) (type: double), (UDFToDouble(cbigint) / (cdouble + 200.0)) (type: double), ((cdouble + 200.0) / (cdouble + 200.0)) (type: double), (3.0 / (cdouble + 200.0)) (type: double), (1.2 / (cdouble + 200.0)) (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col4, _col5
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [13, 16, 17, 15, 18]
+ selectExpressions: DoubleColAddDoubleScalar(col 5:double, val 200.0) -> 13:double, DoubleColDivideDoubleColumn(col 14:double, col 15:double)(children: CastLongToDouble(col 3:bigint) -> 14:double, DoubleColAddDoubleScalar(col 5:double, val 200.0) -> 15:double) -> 16:double, DoubleColDivideDoubleColumn(col 14:double, col 15:double)(children: DoubleColAddDoubleScalar(col 5:double, val 200.0) -> 14:double, DoubleColAddDoubleScalar(col 5:double, val 200.0) -> 15:double) -> 17:double, DoubleScalarDivideDoubleColumn(val 3.0, col 14:double)(children: DoubleColAddDoubleScalar(col 5:double, val 200.0) -> 14:double) -> 15:double, DoubleScalarDivideDoubleColumn(val 1.2, col 14:double)(children: DoubleColAddDoubleScalar(col 5:double, val 200.0) -> 14:double) -> 18:double
+ Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: double), _col1 (type: double)
+ sort order: ++
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col2 (type: double), _col4 (type: double), _col5 (type: double)
+ Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: []
+ featureSupportInUse: []
+ inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ Reducer 2
+ Execution mode: vectorized
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: double), KEY.reducesinkkey1 (type: double), VALUE._col0 (type: double), KEY.reducesinkkey1 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 1, 2, 1, 3, 4]
+ Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Limit Vectorization:
+ className: VectorLimitOperator
+ native: true
+ Statistics: Num rows: 100 Data size: 1200 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 100 Data size: 1200 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 100
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select (cdouble + 200.0) as s1, cbigint / (cdouble + 200.0) as s2, (cdouble + 200.0) / (cdouble + 200.0), cbigint / (cdouble + 200.0), 3 / (cdouble + 200.0), 1.2 / (cdouble + 200.0)
+from alltypesparquet where cdouble >= -500 and cdouble < -199 order by s1, s2 limit 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesparquet
+#### A masked pattern was here ####
+POSTHOOK: query: select (cdouble + 200.0) as s1, cbigint / (cdouble + 200.0) as s2, (cdouble + 200.0) / (cdouble + 200.0), cbigint / (cdouble + 200.0), 3 / (cdouble + 200.0), 1.2 / (cdouble + 200.0)
+from alltypesparquet where cdouble >= -500 and cdouble < -199 order by s1, s2 limit 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesparquet
+#### A masked pattern was here ####
+-292.0 NULL 1.0 NULL -0.010273972602739725 -0.00410958904109589
+-290.0 NULL 1.0 NULL -0.010344827586206896 -0.004137931034482759
+-289.0 NULL 1.0 NULL -0.010380622837370242 -0.004152249134948096
+-281.0 NULL 1.0 NULL -0.010676156583629894 -0.004270462633451957
+-279.0 NULL 1.0 NULL -0.010752688172043012 -0.004301075268817204
+-274.0 6888911.518248175 1.0 6888911.518248175 -0.010948905109489052 -0.00437956204379562
+-273.0 6028764.868131869 1.0 6028764.868131869 -0.01098901098901099 -0.004395604395604396
+-257.0 6404096.53307393 1.0 6404096.53307393 -0.011673151750972763 -0.004669260700389105
+-250.0 6583411.236 1.0 6583411.236 -0.012 -0.0048
+-247.0 NULL 1.0 NULL -0.012145748987854251 -0.004858299595141701
+-247.0 -7546669.174089069 1.0 -7546669.174089069 -0.012145748987854251 -0.004858299595141701
+-246.0 NULL 1.0 NULL -0.012195121951219513 -0.004878048780487805
+-237.0 NULL 1.0 NULL -0.012658227848101266 -0.005063291139240506
+-236.0 NULL 1.0 NULL -0.012711864406779662 -0.005084745762711864
+-229.0 7187130.170305677 1.0 7187130.170305677 -0.013100436681222707 -0.005240174672489083
+-228.0 8278779.631578947 1.0 8278779.631578947 -0.013157894736842105 -0.005263157894736842
+-225.0 NULL 1.0 NULL -0.013333333333333334 -0.005333333333333333
+-210.0 -8876320.40952381 1.0 -8876320.40952381 -0.014285714285714285 -0.005714285714285714
+-201.0 NULL 1.0 NULL -0.014925373134328358 -0.005970149253731343
+-199.0 NULL 1.0 NULL -0.01507537688442211 -0.006030150753768844
+-189.0 NULL 1.0 NULL -0.015873015873015872 -0.006349206349206349
+-188.0 NULL 1.0 NULL -0.015957446808510637 -0.006382978723404255
+-184.0 8944852.222826088 1.0 8944852.222826088 -0.016304347826086956 -0.006521739130434782
+-183.0 8993731.196721312 1.0 8993731.196721312 -0.01639344262295082 -0.006557377049180328
+-181.0 NULL 1.0 NULL -0.016574585635359115 -0.0066298342541436465
+-179.0 NULL 1.0 NULL -0.01675977653631285 -0.0067039106145251395
+-169.0 9738774.01775148 1.0 9738774.01775148 -0.01775147928994083 -0.007100591715976331
+-164.0 NULL 1.0 NULL -0.018292682926829267 -0.007317073170731707
+-161.0 NULL 1.0 NULL -0.018633540372670808 -0.007453416149068323
+-154.0 1.2256894519480519E7 1.0 1.2256894519480519E7 -0.01948051948051948 -0.007792207792207792
+-152.0 NULL 1.0 NULL -0.019736842105263157 -0.007894736842105263
+-148.0 NULL 1.0 NULL -0.02027027027027027 -0.008108108108108109
+-140.0 NULL 1.0 NULL -0.02142857142857143 -0.008571428571428572
+-138.0 NULL 1.0 NULL -0.021739130434782608 -0.008695652173913044
+-137.0 NULL 1.0 NULL -0.021897810218978103 -0.00875912408759124
+-132.0 NULL 1.0 NULL -0.022727272727272728 -0.00909090909090909
+-129.0 1.2758548906976745E7 1.0 1.2758548906976745E7 -0.023255813953488372 -0.009302325581395349
+-128.0 NULL 1.0 NULL -0.0234375 -0.009375
+-126.0 NULL 1.0 NULL -0.023809523809523808 -0.009523809523809523
+-126.0 -1.4793867349206349E7 1.0 -1.4793867349206349E7 -0.023809523809523808 -0.009523809523809523
+-116.0 NULL 1.0 NULL -0.02586206896551724 -0.010344827586206896
+-113.0 NULL 1.0 NULL -0.02654867256637168 -0.010619469026548672
+-113.0 -1.6495816690265486E7 1.0 -1.6495816690265486E7 -0.02654867256637168 -0.010619469026548672
+-96.0 NULL 1.0 NULL -0.03125 -0.012499999999999999
+-94.0 -1.9830077510638297E7 1.0 -1.9830077510638297E7 -0.031914893617021274 -0.01276595744680851
+-93.0 NULL 1.0 NULL -0.03225806451612903 -0.012903225806451613
+-77.0 2.4513789038961038E7 1.0 2.4513789038961038E7 -0.03896103896103896 -0.015584415584415584
+-69.0 2.735596747826087E7 1.0 2.735596747826087E7 -0.043478260869565216 -0.017391304347826087
+-62.0 NULL 1.0 NULL -0.04838709677419355 -0.01935483870967742
+-62.0 3.0444544451612905E7 1.0 3.0444544451612905E7 -0.04838709677419355 -0.01935483870967742
+-60.0 NULL 1.0 NULL -0.05 -0.02
+-57.0 -3.27022330877193E7 1.0 -3.27022330877193E7 -0.05263157894736842 -0.021052631578947368
+-49.0 3.35888328367347E7 1.0 3.35888328367347E7 -0.061224489795918366 -0.024489795918367346
+-46.0 3.577940889130435E7 1.0 3.577940889130435E7 -0.06521739130434782 -0.02608695652173913
+-38.0 4.3311916026315786E7 1.0 4.3311916026315786E7 -0.07894736842105263 -0.031578947368421054
+-28.0 5.878045746428572E7 1.0 5.878045746428572E7 -0.10714285714285714 -0.04285714285714286
+-28.0 6.741291985714285E7 1.0 6.741291985714285E7 -0.10714285714285714 -0.04285714285714286
+-21.0 8.988389314285715E7 1.0 8.988389314285715E7 -0.14285714285714285 -0.05714285714285714
+-20.0 NULL 1.0 NULL -0.15 -0.06
+-17.0 NULL 1.0 NULL -0.17647058823529413 -0.07058823529411765
+-12.0 -1.5533560716666666E8 1.0 -1.5533560716666666E8 -0.25 -0.09999999999999999
+-3.0 NULL 1.0 NULL -1.0 -0.39999999999999997
+0.0 NULL NULL NULL NULL NULL
+0.0 NULL NULL NULL NULL NULL
+0.0 NULL NULL NULL NULL NULL
+0.0 NULL NULL NULL NULL NULL
+0.0 NULL NULL NULL NULL NULL
+0.0 NULL NULL NULL NULL NULL
+0.0 NULL NULL NULL NULL NULL
+0.0 NULL NULL NULL NULL NULL
+0.0 NULL NULL NULL NULL NULL
+0.0 NULL NULL NULL NULL NULL
+0.0 NULL NULL NULL NULL NULL
+0.0 NULL NULL NULL NULL NULL
+0.0 NULL NULL NULL NULL NULL
+0.0 NULL NULL NULL NULL NULL
+0.0 NULL NULL NULL NULL NULL
+0.0 NULL NULL NULL NULL NULL
+0.0 NULL NULL NULL NULL NULL
+0.0 NULL NULL NULL NULL NULL
+0.0 NULL NULL NULL NULL NULL
+0.0 NULL NULL NULL NULL NULL
+0.0 NULL NULL NULL NULL NULL
+0.0 NULL NULL NULL NULL NULL
+0.0 NULL NULL NULL NULL NULL
+0.0 NULL NULL NULL NULL NULL
+0.0 NULL NULL NULL NULL NULL
+0.0 NULL NULL NULL NULL NULL
+0.0 NULL NULL NULL NULL NULL
+0.0 NULL NULL NULL NULL NULL
+0.0 NULL NULL NULL NULL NULL
+0.0 NULL NULL NULL NULL NULL
+0.0 NULL NULL NULL NULL NULL
+0.0 NULL NULL NULL NULL NULL
+0.0 NULL NULL NULL NULL NULL
+0.0 NULL NULL NULL NULL NULL
+0.0 NULL NULL NULL NULL NULL
+0.0 NULL NULL NULL NULL NULL
+0.0 NULL NULL NULL NULL NULL
+0.0 NULL NULL NULL NULL NULL
http://git-wip-us.apache.org/repos/asf/hive/blob/9a59592e/ql/src/test/results/clientpositive/spark/parquet_vectorization_limit.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_limit.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_limit.q.out
new file mode 100644
index 0000000..ca696ab
--- /dev/null
+++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_limit.q.out
@@ -0,0 +1,932 @@
+WARNING: Comparing a bigint and a double may result in a loss of precision.
+PREHOOK: query: explain vectorization SELECT cbigint, cdouble FROM alltypesparquet WHERE cbigint < cdouble and cint > 0 limit 7
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization SELECT cbigint, cdouble FROM alltypesparquet WHERE cbigint < cdouble and cint > 0 limit 7
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: alltypesparquet
+ Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((UDFToDouble(cbigint) < cdouble) and (cint > 0)) (type: boolean)
+ Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cbigint (type: bigint), cdouble (type: double)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 7
+ Statistics: Num rows: 7 Data size: 84 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 7 Data size: 84 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: []
+ featureSupportInUse: []
+ inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 7
+ Processor Tree:
+ ListSink
+
+WARNING: Comparing a bigint and a double may result in a loss of precision.
+PREHOOK: query: SELECT cbigint, cdouble FROM alltypesparquet WHERE cbigint < cdouble and cint > 0 limit 7
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesparquet
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT cbigint, cdouble FROM alltypesparquet WHERE cbigint < cdouble and cint > 0 limit 7
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesparquet
+#### A masked pattern was here ####
+-1887561756 -10011.0
+-1887561756 -13877.0
+-1887561756 -2281.0
+-1887561756 -8881.0
+-1887561756 10361.0
+-1887561756 1839.0
+-1887561756 9531.0
+PREHOOK: query: explain vectorization detail
+select ctinyint,cdouble,csmallint from alltypesparquet where ctinyint is not null order by ctinyint,cdouble limit 20
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select ctinyint,cdouble,csmallint from alltypesparquet where ctinyint is not null order by ctinyint,cdouble limit 20
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: alltypesparquet
+ Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>]
+ Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: SelectColumnIsNotNull(col 0:tinyint)
+ predicate: ctinyint is not null (type: boolean)
+ Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ctinyint (type: tinyint), cdouble (type: double), csmallint (type: smallint)
+ outputColumnNames: _col0, _col1, _col2
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 5, 1]
+ Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: tinyint), _col1 (type: double)
+ sort order: ++
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ keyColumnNums: [0, 5]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ valueColumnNums: [1]
+ Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.3
+ value expressions: _col2 (type: smallint)
+ Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: []
+ featureSupportInUse: []
+ inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 12
+ includeColumns: [0, 1, 5]
+ dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
+ Reducer 2
+ Execution mode: vectorized
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
+ reduceColumnNullOrder: aa
+ reduceColumnSortOrder: ++
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 3
+ dataColumns: KEY.reducesinkkey0:tinyint, KEY.reducesinkkey1:double, VALUE._col0:smallint
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: double), VALUE._col0 (type: smallint)
+ outputColumnNames: _col0, _col1, _col2
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 1, 2]
+ Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 20
+ Limit Vectorization:
+ className: VectorLimitOperator
+ native: true
+ Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 20
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select ctinyint,cdouble,csmallint from alltypesparquet where ctinyint is not null order by ctinyint,cdouble limit 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesparquet
+#### A masked pattern was here ####
+POSTHOOK: query: select ctinyint,cdouble,csmallint from alltypesparquet where ctinyint is not null order by ctinyint,cdouble limit 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesparquet
+#### A masked pattern was here ####
+-64 -10462.0 -10462
+-64 -15920.0 -15920
+-64 -1600.0 -1600
+-64 -200.0 -200
+-64 -2919.0 -2919
+-64 -3097.0 -3097
+-64 -3586.0 -3586
+-64 -4018.0 -4018
+-64 -4040.0 -4040
+-64 -4803.0 -4803
+-64 -6907.0 -6907
+-64 -7196.0 -7196
+-64 -7196.0 -7196
+-64 -7196.0 -7196
+-64 -7196.0 -7196
+-64 -7196.0 -7196
+-64 -7196.0 -7196
+-64 -7196.0 -7196
+-64 -8080.0 -8080
+-64 -9842.0 -9842
+PREHOOK: query: explain vectorization detail
+select ctinyint,avg(cdouble + 1) from alltypesparquet group by ctinyint order by ctinyint limit 20
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select ctinyint,avg(cdouble + 1) from alltypesparquet group by ctinyint order by ctinyint limit 20
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: alltypesparquet
+ Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>]
+ Select Operator
+ expressions: ctinyint (type: tinyint), (cdouble + 1.0) (type: double)
+ outputColumnNames: _col0, _col1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 13]
+ selectExpressions: DoubleColAddDoubleScalar(col 5:double, val 1.0) -> 13:double
+ Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: avg(_col1)
+ Group By Vectorization:
+ aggregators: VectorUDAFAvgDouble(col 13:double) -> struct<count:bigint,sum:double,input:double>
+ className: VectorGroupByOperator
+ groupByMode: HASH
+ keyExpressions: col 0:tinyint
+ native: false
+ vectorProcessingMode: HASH
+ projectedOutputColumnNums: [0]
+ keys: _col0 (type: tinyint)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: tinyint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: tinyint)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ keyColumnNums: [0]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ partitionColumnNums: [0]
+ valueColumnNums: [1]
+ Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.3
+ value expressions: _col1 (type: struct<count:bigint,sum:double,input:double>)
+ Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: []
+ featureSupportInUse: []
+ inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 12
+ includeColumns: [0, 5]
+ dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
+ partitionColumnCount: 0
+ scratchColumnTypeNames: [double]
+ Reducer 2
+ Execution mode: vectorized
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
+ reduceColumnNullOrder: a
+ reduceColumnSortOrder: +
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ dataColumns: KEY._col0:tinyint, VALUE._col0:struct<count:bigint,sum:double,input:double>
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: avg(VALUE._col0)
+ Group By Vectorization:
+ aggregators: VectorUDAFAvgFinal(col 1:struct<count:bigint,sum:double,input:double>) -> double
+ className: VectorGroupByOperator
+ groupByMode: MERGEPARTIAL
+ keyExpressions: col 0:tinyint
+ native: false
+ vectorProcessingMode: MERGE_PARTIAL
+ projectedOutputColumnNums: [0]
+ keys: KEY._col0 (type: tinyint)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 20
+ Limit Vectorization:
+ className: VectorLimitOperator
+ native: true
+ Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 20
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select ctinyint,avg(cdouble + 1) from alltypesparquet group by ctinyint order by ctinyint limit 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesparquet
+#### A masked pattern was here ####
+POSTHOOK: query: select ctinyint,avg(cdouble + 1) from alltypesparquet group by ctinyint order by ctinyint limit 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesparquet
+#### A masked pattern was here ####
+-46 3033.55
+-47 -574.6428571428571
+-48 1672.909090909091
+-49 768.7659574468086
+-50 -960.0192307692307
+-51 -96.46341463414635
+-52 2810.705882352941
+-53 -532.7567567567568
+-54 2712.7272727272725
+-55 2385.595744680851
+-56 2595.818181818182
+-57 1867.0535714285713
+-58 3483.2444444444445
+-59 318.27272727272725
+-60 1071.82
+-61 914.3404255319149
+-62 245.69387755102042
+-63 2178.7272727272725
+-64 373.52941176470586
+NULL 9370.0945309795
+PREHOOK: query: explain vectorization detail
+select distinct(ctinyint) from alltypesparquet limit 20
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select distinct(ctinyint) from alltypesparquet limit 20
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (GROUP, 2)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: alltypesparquet
+ Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>]
+ Select Operator
+ expressions: ctinyint (type: tinyint)
+ outputColumnNames: ctinyint
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0]
+ Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ Group By Vectorization:
+ className: VectorGroupByOperator
+ groupByMode: HASH
+ keyExpressions: col 0:tinyint
+ native: false
+ vectorProcessingMode: HASH
+ projectedOutputColumnNums: []
+ keys: ctinyint (type: tinyint)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: tinyint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: tinyint)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkLongOperator
+ keyColumnNums: [0]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ valueColumnNums: []
+ Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.3
+ Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: []
+ featureSupportInUse: []
+ inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 12
+ includeColumns: [0]
+ dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
+ Reducer 2
+ Execution mode: vectorized
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
+ reduceColumnNullOrder: a
+ reduceColumnSortOrder: +
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 1
+ dataColumns: KEY._col0:tinyint
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
+ Reduce Operator Tree:
+ Group By Operator
+ Group By Vectorization:
+ className: VectorGroupByOperator
+ groupByMode: MERGEPARTIAL
+ keyExpressions: col 0:tinyint
+ native: false
+ vectorProcessingMode: MERGE_PARTIAL
+ projectedOutputColumnNums: []
+ keys: KEY._col0 (type: tinyint)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 20
+ Limit Vectorization:
+ className: VectorLimitOperator
+ native: true
+ Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 20
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select distinct(ctinyint) from alltypesparquet limit 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesparquet
+#### A masked pattern was here ####
+POSTHOOK: query: select distinct(ctinyint) from alltypesparquet limit 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesparquet
+#### A masked pattern was here ####
+-46
+-47
+-48
+-49
+-50
+-51
+-52
+-53
+-54
+-55
+-56
+-57
+-58
+-59
+-60
+-61
+-62
+-63
+-64
+NULL
+PREHOOK: query: explain vectorization detail
+select ctinyint, count(distinct(cdouble)) from alltypesparquet group by ctinyint order by ctinyint limit 20
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select ctinyint, count(distinct(cdouble)) from alltypesparquet group by ctinyint order by ctinyint limit 20
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: alltypesparquet
+ Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>]
+ Select Operator
+ expressions: ctinyint (type: tinyint), cdouble (type: double)
+ outputColumnNames: ctinyint, cdouble
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 5]
+ Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ Group By Vectorization:
+ className: VectorGroupByOperator
+ groupByMode: HASH
+ keyExpressions: col 0:tinyint, col 5:double
+ native: false
+ vectorProcessingMode: HASH
+ projectedOutputColumnNums: []
+ keys: ctinyint (type: tinyint), cdouble (type: double)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: tinyint), _col1 (type: double)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: tinyint)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ keyColumnNums: [0, 1]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ partitionColumnNums: [0]
+ valueColumnNums: []
+ Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: []
+ featureSupportInUse: []
+ inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 12
+ includeColumns: [0, 5]
+ dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
+ Reducer 2
+ Execution mode: vectorized
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
+ reduceColumnNullOrder: aa
+ reduceColumnSortOrder: ++
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ dataColumns: KEY._col0:tinyint, KEY._col1:double
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
+ Reduce Operator Tree:
+ Group By Operator
+ Group By Vectorization:
+ className: VectorGroupByOperator
+ groupByMode: MERGEPARTIAL
+ keyExpressions: col 0:tinyint, col 1:double
+ native: false
+ vectorProcessingMode: MERGE_PARTIAL
+ projectedOutputColumnNums: []
+ keys: KEY._col0 (type: tinyint), KEY._col1 (type: double)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(_col1)
+ Group By Vectorization:
+ aggregators: VectorUDAFCount(col 1:double) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: COMPLETE
+ keyExpressions: col 0:tinyint
+ native: false
+ vectorProcessingMode: STREAMING
+ projectedOutputColumnNums: [0]
+ keys: _col0 (type: tinyint)
+ mode: complete
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3072 Data size: 36864 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 20
+ Limit Vectorization:
+ className: VectorLimitOperator
+ native: true
+ Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 20
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select ctinyint, count(distinct(cdouble)) from alltypesparquet group by ctinyint order by ctinyint limit 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesparquet
+#### A masked pattern was here ####
+POSTHOOK: query: select ctinyint, count(distinct(cdouble)) from alltypesparquet group by ctinyint order by ctinyint limit 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesparquet
+#### A masked pattern was here ####
+-46 24
+-47 22
+-48 29
+-49 26
+-50 30
+-51 21
+-52 33
+-53 22
+-54 26
+-55 29
+-56 36
+-57 35
+-58 23
+-59 31
+-60 27
+-61 25
+-62 27
+-63 19
+-64 24
+NULL 2932
+PREHOOK: query: explain vectorization detail
+select ctinyint,cdouble from alltypesparquet order by ctinyint limit 0
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select ctinyint,cdouble from alltypesparquet order by ctinyint limit 0
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: 0
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select ctinyint,cdouble from alltypesparquet order by ctinyint limit 0
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesparquet
+#### A masked pattern was here ####
+POSTHOOK: query: select ctinyint,cdouble from alltypesparquet order by ctinyint limit 0
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesparquet
+#### A masked pattern was here ####
+PREHOOK: query: explain vectorization detail
+select cdouble, sum(ctinyint) as sum from alltypesparquet where ctinyint is not null group by cdouble order by sum, cdouble limit 20
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select cdouble, sum(ctinyint) as sum from alltypesparquet where ctinyint is not null group by cdouble order by sum, cdouble limit 20
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (GROUP, 2)
+ Reducer 3 <- Reducer 2 (SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: alltypesparquet
+ Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>]
+ Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: SelectColumnIsNotNull(col 0:tinyint)
+ predicate: ctinyint is not null (type: boolean)
+ Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(ctinyint)
+ Group By Vectorization:
+ aggregators: VectorUDAFSumLong(col 0:tinyint) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: HASH
+ keyExpressions: col 5:double
+ native: false
+ vectorProcessingMode: HASH
+ projectedOutputColumnNums: [0]
+ keys: cdouble (type: double)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: double)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: double)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkMultiKeyOperator
+ keyColumnNums: [0]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ valueColumnNums: [1]
+ Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: []
+ featureSupportInUse: []
+ inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 12
+ includeColumns: [0, 5]
+ dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
+ Reducer 2
+ Execution mode: vectorized
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
+ reduceColumnNullOrder: a
+ reduceColumnSortOrder: +
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ dataColumns: KEY._col0:double, VALUE._col0:bigint
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ Group By Vectorization:
+ aggregators: VectorUDAFSumLong(col 1:bigint) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: MERGEPARTIAL
+ keyExpressions: col 0:double
+ native: false
+ vectorProcessingMode: MERGE_PARTIAL
+ projectedOutputColumnNums: [0]
+ keys: KEY._col0 (type: double)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: bigint), _col0 (type: double)
+ sort order: ++
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ keyColumnNums: [1, 0]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ valueColumnNums: []
+ Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.3
+ Reducer 3
+ Execution mode: vectorized
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
+ reduceColumnNullOrder: aa
+ reduceColumnSortOrder: ++
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ dataColumns: KEY.reducesinkkey0:bigint, KEY.reducesinkkey1:double
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey1 (type: double), KEY.reducesinkkey0 (type: bigint)
+ outputColumnNames: _col0, _col1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [1, 0]
+ Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 20
+ Limit Vectorization:
+ className: VectorLimitOperator
+ native: true
+ Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 20
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select cdouble, sum(ctinyint) as sum from alltypesparquet where ctinyint is not null group by cdouble order by sum, cdouble limit 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesparquet
+#### A masked pattern was here ####
+POSTHOOK: query: select cdouble, sum(ctinyint) as sum from alltypesparquet where ctinyint is not null group by cdouble order by sum, cdouble limit 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesparquet
+#### A masked pattern was here ####
+-10462.0 -64
+-1121.0 -89
+-11322.0 -101
+-11492.0 -78
+-15920.0 -64
+-4803.0 -64
+-6907.0 -64
+-7196.0 -2009
+-8080.0 -64
+-8118.0 -80
+-9842.0 -64
+10496.0 -67
+15601.0 -1733
+3520.0 -86
+4811.0 -115
+5241.0 -80
+557.0 -75
+7705.0 -88
+9452.0 -76
+NULL -32768
http://git-wip-us.apache.org/repos/asf/hive/blob/9a59592e/ql/src/test/results/clientpositive/spark/parquet_vectorization_nested_udf.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_nested_udf.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_nested_udf.q.out
new file mode 100644
index 0000000..acac581
--- /dev/null
+++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_nested_udf.q.out
@@ -0,0 +1,9 @@
+PREHOOK: query: SELECT SUM(abs(ctinyint)) from alltypesparquet
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesparquet
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT SUM(abs(ctinyint)) from alltypesparquet
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesparquet
+#### A masked pattern was here ####
+261468
http://git-wip-us.apache.org/repos/asf/hive/blob/9a59592e/ql/src/test/results/clientpositive/spark/parquet_vectorization_not.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_not.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_not.q.out
new file mode 100644
index 0000000..e581007
--- /dev/null
+++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_not.q.out
@@ -0,0 +1,58 @@
+WARNING: Comparing a bigint and a double may result in a loss of precision.
+PREHOOK: query: SELECT AVG(cbigint),
+ (-(AVG(cbigint))),
+ (-6432 + AVG(cbigint)),
+ STDDEV_POP(cbigint),
+ (-((-6432 + AVG(cbigint)))),
+ ((-((-6432 + AVG(cbigint)))) + (-6432 + AVG(cbigint))),
+ VAR_SAMP(cbigint),
+ (-((-6432 + AVG(cbigint)))),
+ (-6432 + (-((-6432 + AVG(cbigint))))),
+ (-((-6432 + AVG(cbigint)))),
+ ((-((-6432 + AVG(cbigint)))) / (-((-6432 + AVG(cbigint))))),
+ COUNT(*),
+ SUM(cfloat),
+ (VAR_SAMP(cbigint) % STDDEV_POP(cbigint)),
+ (-(VAR_SAMP(cbigint))),
+ ((-((-6432 + AVG(cbigint)))) * (-(AVG(cbigint)))),
+ MIN(ctinyint),
+ (-(MIN(ctinyint)))
+FROM alltypesparquet
+WHERE (((cstring2 LIKE '%b%')
+ OR ((79.553 != cint)
+ OR (NOT(cbigint >= cdouble))))
+ OR ((ctinyint >= csmallint)
+ AND (NOT ((cboolean2 != 1)
+ OR (3569 != ctinyint)))))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesparquet
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT AVG(cbigint),
+ (-(AVG(cbigint))),
+ (-6432 + AVG(cbigint)),
+ STDDEV_POP(cbigint),
+ (-((-6432 + AVG(cbigint)))),
+ ((-((-6432 + AVG(cbigint)))) + (-6432 + AVG(cbigint))),
+ VAR_SAMP(cbigint),
+ (-((-6432 + AVG(cbigint)))),
+ (-6432 + (-((-6432 + AVG(cbigint))))),
+ (-((-6432 + AVG(cbigint)))),
+ ((-((-6432 + AVG(cbigint)))) / (-((-6432 + AVG(cbigint))))),
+ COUNT(*),
+ SUM(cfloat),
+ (VAR_SAMP(cbigint) % STDDEV_POP(cbigint)),
+ (-(VAR_SAMP(cbigint))),
+ ((-((-6432 + AVG(cbigint)))) * (-(AVG(cbigint)))),
+ MIN(ctinyint),
+ (-(MIN(ctinyint)))
+FROM alltypesparquet
+WHERE (((cstring2 LIKE '%b%')
+ OR ((79.553 != cint)
+ OR (NOT(cbigint >= cdouble))))
+ OR ((ctinyint >= csmallint)
+ AND (NOT ((cboolean2 != 1)
+ OR (3569 != ctinyint)))))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesparquet
+#### A masked pattern was here ####
+-3.875652215945533E8 3.875652215945533E8 -3.875716535945533E8 1.436387455459401E9 3.875716535945533E8 0.0 2.06347151720204902E18 3.875716535945533E8 3.875652215945533E8 3.875716535945533E8 1.0 10934 -37224.52399241924 1.0517370547117279E9 -2.06347151720204902E18 1.5020929380914048E17 -64 64
http://git-wip-us.apache.org/repos/asf/hive/blob/9a59592e/ql/src/test/results/clientpositive/spark/parquet_vectorization_offset_limit.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_offset_limit.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_offset_limit.q.out
new file mode 100644
index 0000000..e90cff4
--- /dev/null
+++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_offset_limit.q.out
@@ -0,0 +1,184 @@
+WARNING: Comparing a bigint and a double may result in a loss of precision.
+PREHOOK: query: explain vectorization SELECT cbigint, cdouble FROM alltypesparquet WHERE cbigint < cdouble and cint > 0 limit 3,2
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization SELECT cbigint, cdouble FROM alltypesparquet WHERE cbigint < cdouble and cint > 0 limit 3,2
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: alltypesparquet
+ Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((UDFToDouble(cbigint) < cdouble) and (cint > 0)) (type: boolean)
+ Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cbigint (type: bigint), cdouble (type: double)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 2
+ Offset of rows: 3
+ Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: []
+ featureSupportInUse: []
+ inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 2
+ Processor Tree:
+ ListSink
+
+WARNING: Comparing a bigint and a double may result in a loss of precision.
+PREHOOK: query: SELECT cbigint, cdouble FROM alltypesparquet WHERE cbigint < cdouble and cint > 0 limit 3,2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesparquet
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT cbigint, cdouble FROM alltypesparquet WHERE cbigint < cdouble and cint > 0 limit 3,2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesparquet
+#### A masked pattern was here ####
+-1887561756 10361.0
+-1887561756 -8881.0
+PREHOOK: query: explain vectorization expression
+select ctinyint,cdouble,csmallint from alltypesparquet where ctinyint is not null order by ctinyint,cdouble limit 10,3
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization expression
+select ctinyint,cdouble,csmallint from alltypesparquet where ctinyint is not null order by ctinyint,cdouble limit 10,3
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: alltypesparquet
+ Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: SelectColumnIsNotNull(col 0:tinyint)
+ predicate: ctinyint is not null (type: boolean)
+ Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ctinyint (type: tinyint), cdouble (type: double), csmallint (type: smallint)
+ outputColumnNames: _col0, _col1, _col2
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 5, 1]
+ Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: tinyint), _col1 (type: double)
+ sort order: ++
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col2 (type: smallint)
+ Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: []
+ featureSupportInUse: []
+ inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ Reducer 2
+ Execution mode: vectorized
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: double), VALUE._col0 (type: smallint)
+ outputColumnNames: _col0, _col1, _col2
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 1, 2]
+ Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 3
+ Limit Vectorization:
+ className: VectorLimitOperator
+ native: true
+ Offset of rows: 10
+ Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 3
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select ctinyint,cdouble,csmallint from alltypesparquet where ctinyint is not null order by ctinyint,cdouble limit 10,3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesparquet
+#### A masked pattern was here ####
+POSTHOOK: query: select ctinyint,cdouble,csmallint from alltypesparquet where ctinyint is not null order by ctinyint,cdouble limit 10,3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesparquet
+#### A masked pattern was here ####
+-64 -7196.0 -7196
+-64 -7196.0 -7196
+-64 -7196.0 -7196