You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by se...@apache.org on 2016/10/14 00:19:24 UTC
[03/57] [abbrv] [partial] hive git commit: HIVE-11394: Enhance
EXPLAIN display for vectorization (Matt McCline,
reviewed by Gopal Vijayaraghavan)
http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/spark/vector_outer_join0.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vector_outer_join0.q.out b/ql/src/test/results/clientpositive/spark/vector_outer_join0.q.out
index b311c49..d1319b8 100644
--- a/ql/src/test/results/clientpositive/spark/vector_outer_join0.q.out
+++ b/ql/src/test/results/clientpositive/spark/vector_outer_join0.q.out
@@ -62,12 +62,16 @@ POSTHOOK: Input: default@orc_table_2
4 FOUR
NULL <NULL1>
NULL <NULL2>
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization detail
select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 left outer join orc_table_2 t2 on t1.a = t2.c
PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization detail
select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 left outer join orc_table_2 t2 on t1.a = t2.c
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-2 is a root stage
Stage-1 depends on stages: Stage-2
@@ -83,15 +87,38 @@ STAGE PLANS:
TableScan
alias: t2
Statistics: Num rows: 6 Data size: 550 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1]
Select Operator
expressions: c (type: int), v2 (type: string)
outputColumnNames: _col0, _col1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 1]
Statistics: Num rows: 6 Data size: 550 Basic stats: COMPLETE Column stats: NONE
Spark HashTable Sink Operator
+ Spark Hash Table Sink Vectorization:
+ className: VectorSparkHashTableSinkOperator
+ native: true
keys:
0 _col1 (type: int)
1 _col0 (type: int)
Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ includeColumns: [0, 1]
+ dataColumns: c:int, v2:string
+ partitionColumnCount: 0
Local Work:
Map Reduce Local Work
@@ -104,9 +131,16 @@ STAGE PLANS:
TableScan
alias: t1
Statistics: Num rows: 6 Data size: 544 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1]
Select Operator
expressions: v1 (type: string), a (type: int)
outputColumnNames: _col0, _col1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 1]
Statistics: Num rows: 6 Data size: 544 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
@@ -114,18 +148,45 @@ STAGE PLANS:
keys:
0 _col1 (type: int)
1 _col0 (type: int)
+ Map Join Vectorization:
+ bigTableKeyColumns: [1]
+ bigTableOuterKeyMapping: 1 -> 2
+ bigTableRetainedColumns: [0, 1, 2]
+ bigTableValueColumns: [0, 1]
+ className: VectorMapJoinOuterLongOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true
+ projectedOutputColumns: [0, 1, 2, 3]
+ smallTableMapping: [3]
outputColumnNames: _col0, _col1, _col2, _col3
input vertices:
1 Map 2
Statistics: Num rows: 6 Data size: 598 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 6 Data size: 598 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ includeColumns: [0, 1]
+ dataColumns: v1:string, a:int
+ partitionColumnCount: 0
+ scratchColumnTypeNames: bigint, String
Local Work:
Map Reduce Local Work
@@ -155,12 +216,16 @@ one 1 NULL NULL
one 1 NULL NULL
three 3 3 THREE
two 2 2 TWO
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization detail
select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 right outer join orc_table_2 t2 on t1.a = t2.c
PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization detail
select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 right outer join orc_table_2 t2 on t1.a = t2.c
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-2 is a root stage
Stage-1 depends on stages: Stage-2
@@ -176,15 +241,38 @@ STAGE PLANS:
TableScan
alias: t1
Statistics: Num rows: 6 Data size: 544 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1]
Select Operator
expressions: v1 (type: string), a (type: int)
outputColumnNames: _col0, _col1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 1]
Statistics: Num rows: 6 Data size: 544 Basic stats: COMPLETE Column stats: NONE
Spark HashTable Sink Operator
+ Spark Hash Table Sink Vectorization:
+ className: VectorSparkHashTableSinkOperator
+ native: true
keys:
0 _col1 (type: int)
1 _col0 (type: int)
Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ includeColumns: [0, 1]
+ dataColumns: v1:string, a:int
+ partitionColumnCount: 0
Local Work:
Map Reduce Local Work
@@ -197,9 +285,16 @@ STAGE PLANS:
TableScan
alias: t2
Statistics: Num rows: 6 Data size: 550 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1]
Select Operator
expressions: c (type: int), v2 (type: string)
outputColumnNames: _col0, _col1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 1]
Statistics: Num rows: 6 Data size: 550 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
@@ -207,18 +302,45 @@ STAGE PLANS:
keys:
0 _col1 (type: int)
1 _col0 (type: int)
+ Map Join Vectorization:
+ bigTableKeyColumns: [0]
+ bigTableOuterKeyMapping: 0 -> 3
+ bigTableRetainedColumns: [0, 1, 3]
+ bigTableValueColumns: [0, 1]
+ className: VectorMapJoinOuterLongOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true
+ projectedOutputColumns: [2, 3, 0, 1]
+ smallTableMapping: [2]
outputColumnNames: _col0, _col1, _col2, _col3
input vertices:
0 Map 1
Statistics: Num rows: 6 Data size: 598 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 6 Data size: 598 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ includeColumns: [0, 1]
+ dataColumns: c:int, v2:string
+ partitionColumnCount: 0
+ scratchColumnTypeNames: String, bigint
Local Work:
Map Reduce Local Work
http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/spark/vector_outer_join1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vector_outer_join1.q.out b/ql/src/test/results/clientpositive/spark/vector_outer_join1.q.out
index 6b89fb3..3a7e27f 100644
--- a/ql/src/test/results/clientpositive/spark/vector_outer_join1.q.out
+++ b/ql/src/test/results/clientpositive/spark/vector_outer_join1.q.out
@@ -216,18 +216,22 @@ NULL NULL -850295959 -1887561756 NULL NULL WMIgGA73 4hA4KQj2vD3fI6gX82220d NULL
NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false
NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false
NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization detail
select *
from small_alltypesorc_a c
left outer join small_alltypesorc_a cd
on cd.cint = c.cint
PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization detail
select *
from small_alltypesorc_a c
left outer join small_alltypesorc_a cd
on cd.cint = c.cint
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-2 is a root stage
Stage-1 depends on stages: Stage-2
@@ -243,15 +247,38 @@ STAGE PLANS:
TableScan
alias: cd
Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
Select Operator
expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE
Spark HashTable Sink Operator
+ Spark Hash Table Sink Vectorization:
+ className: VectorSparkHashTableSinkOperator
+ native: true
keys:
0 _col2 (type: int)
1 _col2 (type: int)
Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 12
+ includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
+ dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
+ partitionColumnCount: 0
Local Work:
Map Reduce Local Work
@@ -264,9 +291,16 @@ STAGE PLANS:
TableScan
alias: c
Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
Select Operator
expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
@@ -274,18 +308,45 @@ STAGE PLANS:
keys:
0 _col2 (type: int)
1 _col2 (type: int)
+ Map Join Vectorization:
+ bigTableKeyColumns: [2]
+ bigTableOuterKeyMapping: 2 -> 14
+ bigTableRetainedColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 14]
+ bigTableValueColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
+ className: VectorMapJoinOuterLongOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]
+ smallTableMapping: [12, 13, 15, 16, 17, 18, 19, 20, 21, 22, 23]
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23
input vertices:
1 Map 2
Statistics: Num rows: 16 Data size: 4403 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 16 Data size: 4403 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 12
+ includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
+ dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
+ partitionColumnCount: 0
+ scratchColumnTypeNames: bigint, bigint, bigint, bigint, Double, Double, String, String, Timestamp, Timestamp, bigint, bigint
Local Work:
Map Reduce Local Work
@@ -332,18 +393,22 @@ NULL NULL -850295959 -1887561756 NULL NULL WMIgGA73 4hA4KQj2vD3fI6gX82220d NULL
NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false
NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false
NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization detail
select c.ctinyint
from small_alltypesorc_a c
left outer join small_alltypesorc_a hd
on hd.ctinyint = c.ctinyint
PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization detail
select c.ctinyint
from small_alltypesorc_a c
left outer join small_alltypesorc_a hd
on hd.ctinyint = c.ctinyint
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-2 is a root stage
Stage-1 depends on stages: Stage-2
@@ -359,15 +424,38 @@ STAGE PLANS:
TableScan
alias: hd
Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
Select Operator
expressions: ctinyint (type: tinyint)
outputColumnNames: _col0
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0]
Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE
Spark HashTable Sink Operator
+ Spark Hash Table Sink Vectorization:
+ className: VectorSparkHashTableSinkOperator
+ native: true
keys:
0 _col0 (type: tinyint)
1 _col0 (type: tinyint)
Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 12
+ includeColumns: [0]
+ dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
+ partitionColumnCount: 0
Local Work:
Map Reduce Local Work
@@ -380,9 +468,16 @@ STAGE PLANS:
TableScan
alias: c
Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
Select Operator
expressions: ctinyint (type: tinyint)
outputColumnNames: _col0
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0]
Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
@@ -390,18 +485,42 @@ STAGE PLANS:
keys:
0 _col0 (type: tinyint)
1 _col0 (type: tinyint)
+ Map Join Vectorization:
+ bigTableKeyColumns: [0]
+ bigTableRetainedColumns: [0]
+ bigTableValueColumns: [0]
+ className: VectorMapJoinOuterLongOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true
+ projectedOutputColumns: [0]
outputColumnNames: _col0
input vertices:
1 Map 2
Statistics: Num rows: 16 Data size: 4403 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 16 Data size: 4403 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 12
+ includeColumns: [0]
+ dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
+ partitionColumnCount: 0
Local Work:
Map Reduce Local Work
@@ -534,7 +653,7 @@ NULL
NULL
NULL
NULL
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization detail
select count(*), sum(t1.c_ctinyint) from (select c.ctinyint as c_ctinyint
from small_alltypesorc_a c
left outer join small_alltypesorc_a cd
@@ -543,7 +662,7 @@ left outer join small_alltypesorc_a hd
on hd.ctinyint = c.ctinyint
) t1
PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization detail
select count(*), sum(t1.c_ctinyint) from (select c.ctinyint as c_ctinyint
from small_alltypesorc_a c
left outer join small_alltypesorc_a cd
@@ -552,6 +671,10 @@ left outer join small_alltypesorc_a hd
on hd.ctinyint = c.ctinyint
) t1
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-2 is a root stage
Stage-1 depends on stages: Stage-2
@@ -567,15 +690,38 @@ STAGE PLANS:
TableScan
alias: cd
Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
Select Operator
expressions: cint (type: int)
outputColumnNames: _col0
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [2]
Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE
Spark HashTable Sink Operator
+ Spark Hash Table Sink Vectorization:
+ className: VectorSparkHashTableSinkOperator
+ native: true
keys:
0 _col1 (type: int)
1 _col0 (type: int)
Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 12
+ includeColumns: [2]
+ dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
+ partitionColumnCount: 0
Local Work:
Map Reduce Local Work
Map 4
@@ -583,15 +729,38 @@ STAGE PLANS:
TableScan
alias: hd
Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
Select Operator
expressions: ctinyint (type: tinyint)
outputColumnNames: _col0
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0]
Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE
Spark HashTable Sink Operator
+ Spark Hash Table Sink Vectorization:
+ className: VectorSparkHashTableSinkOperator
+ native: true
keys:
0 _col0 (type: tinyint)
1 _col0 (type: tinyint)
Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 12
+ includeColumns: [0]
+ dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
+ partitionColumnCount: 0
Local Work:
Map Reduce Local Work
@@ -606,9 +775,16 @@ STAGE PLANS:
TableScan
alias: c
Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
Select Operator
expressions: ctinyint (type: tinyint), cint (type: int)
outputColumnNames: _col0, _col1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 2]
Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
@@ -616,6 +792,14 @@ STAGE PLANS:
keys:
0 _col1 (type: int)
1 _col0 (type: int)
+ Map Join Vectorization:
+ bigTableKeyColumns: [2]
+ bigTableRetainedColumns: [0]
+ bigTableValueColumns: [0]
+ className: VectorMapJoinOuterLongOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true
+ projectedOutputColumns: [0]
outputColumnNames: _col0
input vertices:
1 Map 3
@@ -626,32 +810,84 @@ STAGE PLANS:
keys:
0 _col0 (type: tinyint)
1 _col0 (type: tinyint)
+ Map Join Vectorization:
+ bigTableKeyColumns: [0]
+ bigTableRetainedColumns: [0]
+ bigTableValueColumns: [0]
+ className: VectorMapJoinOuterLongOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true
+ projectedOutputColumns: [0]
outputColumnNames: _col0
input vertices:
1 Map 4
Statistics: Num rows: 17 Data size: 4843 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(), sum(_col0)
+ Group By Vectorization:
+ aggregators: VectorUDAFCountStar(*) -> bigint, VectorUDAFSumLong(col 0) -> bigint
+ className: VectorGroupByOperator
+ vectorOutput: true
+ native: false
+ projectedOutputColumns: [0, 1]
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
sort order:
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: Uniform Hash IS false
Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: bigint), _col1 (type: bigint)
Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 12
+ includeColumns: [0, 2]
+ dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
+ partitionColumnCount: 0
Local Work:
Map Reduce Local Work
Reducer 2
Execution mode: vectorized
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ dataColumns: VALUE._col0:bigint, VALUE._col1:bigint
+ partitionColumnCount: 0
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0), sum(VALUE._col1)
+ Group By Vectorization:
+ aggregators: VectorUDAFCountMerge(col 0) -> bigint, VectorUDAFSumLong(col 1) -> bigint
+ className: VectorGroupByOperator
+ vectorOutput: true
+ native: false
+ projectedOutputColumns: [0, 1]
mode: mergepartial
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/spark/vector_outer_join2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vector_outer_join2.q.out b/ql/src/test/results/clientpositive/spark/vector_outer_join2.q.out
index 113c7d0..453db4b 100644
--- a/ql/src/test/results/clientpositive/spark/vector_outer_join2.q.out
+++ b/ql/src/test/results/clientpositive/spark/vector_outer_join2.q.out
@@ -226,7 +226,7 @@ NULL NULL -850295959 -1887561756 NULL NULL WMIgGA73 4hA4KQj2vD3fI6gX82220d NULL
NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false
NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false
NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization detail
select count(*), sum(t1.c_cbigint) from (select c.cbigint as c_cbigint
from small_alltypesorc_a c
left outer join small_alltypesorc_a cd
@@ -235,7 +235,7 @@ left outer join small_alltypesorc_a hd
on hd.cbigint = c.cbigint
) t1
PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization detail
select count(*), sum(t1.c_cbigint) from (select c.cbigint as c_cbigint
from small_alltypesorc_a c
left outer join small_alltypesorc_a cd
@@ -244,6 +244,10 @@ left outer join small_alltypesorc_a hd
on hd.cbigint = c.cbigint
) t1
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-2 is a root stage
Stage-1 depends on stages: Stage-2
@@ -259,15 +263,38 @@ STAGE PLANS:
TableScan
alias: cd
Statistics: Num rows: 20 Data size: 5237 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
Select Operator
expressions: cint (type: int)
outputColumnNames: _col0
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [2]
Statistics: Num rows: 20 Data size: 5237 Basic stats: COMPLETE Column stats: NONE
Spark HashTable Sink Operator
+ Spark Hash Table Sink Vectorization:
+ className: VectorSparkHashTableSinkOperator
+ native: true
keys:
0 _col0 (type: int)
1 _col0 (type: int)
Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 12
+ includeColumns: [2]
+ dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
+ partitionColumnCount: 0
Local Work:
Map Reduce Local Work
Map 4
@@ -275,15 +302,38 @@ STAGE PLANS:
TableScan
alias: hd
Statistics: Num rows: 20 Data size: 5237 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
Select Operator
expressions: cbigint (type: bigint)
outputColumnNames: _col0
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [3]
Statistics: Num rows: 20 Data size: 5237 Basic stats: COMPLETE Column stats: NONE
Spark HashTable Sink Operator
+ Spark Hash Table Sink Vectorization:
+ className: VectorSparkHashTableSinkOperator
+ native: true
keys:
0 _col1 (type: bigint)
1 _col0 (type: bigint)
Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 12
+ includeColumns: [3]
+ dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
+ partitionColumnCount: 0
Local Work:
Map Reduce Local Work
@@ -298,9 +348,16 @@ STAGE PLANS:
TableScan
alias: c
Statistics: Num rows: 20 Data size: 5237 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
Select Operator
expressions: cint (type: int), cbigint (type: bigint)
outputColumnNames: _col0, _col1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [2, 3]
Statistics: Num rows: 20 Data size: 5237 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
@@ -308,6 +365,14 @@ STAGE PLANS:
keys:
0 _col0 (type: int)
1 _col0 (type: int)
+ Map Join Vectorization:
+ bigTableKeyColumns: [2]
+ bigTableRetainedColumns: [3]
+ bigTableValueColumns: [3]
+ className: VectorMapJoinOuterLongOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true
+ projectedOutputColumns: [3]
outputColumnNames: _col1
input vertices:
1 Map 3
@@ -318,32 +383,84 @@ STAGE PLANS:
keys:
0 _col1 (type: bigint)
1 _col0 (type: bigint)
+ Map Join Vectorization:
+ bigTableKeyColumns: [3]
+ bigTableRetainedColumns: [3]
+ bigTableValueColumns: [3]
+ className: VectorMapJoinOuterLongOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true
+ projectedOutputColumns: [3]
outputColumnNames: _col1
input vertices:
1 Map 4
Statistics: Num rows: 24 Data size: 6336 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(), sum(_col1)
+ Group By Vectorization:
+ aggregators: VectorUDAFCountStar(*) -> bigint, VectorUDAFSumLong(col 3) -> bigint
+ className: VectorGroupByOperator
+ vectorOutput: true
+ native: false
+ projectedOutputColumns: [0, 1]
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
sort order:
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: Uniform Hash IS false
Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: bigint), _col1 (type: bigint)
Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 12
+ includeColumns: [2, 3]
+ dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
+ partitionColumnCount: 0
Local Work:
Map Reduce Local Work
Reducer 2
Execution mode: vectorized
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ dataColumns: VALUE._col0:bigint, VALUE._col1:bigint
+ partitionColumnCount: 0
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0), sum(VALUE._col1)
+ Group By Vectorization:
+ aggregators: VectorUDAFCountMerge(col 0) -> bigint, VectorUDAFSumLong(col 1) -> bigint
+ className: VectorGroupByOperator
+ vectorOutput: true
+ native: false
+ projectedOutputColumns: [0, 1]
mode: mergepartial
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/spark/vector_outer_join3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vector_outer_join3.q.out b/ql/src/test/results/clientpositive/spark/vector_outer_join3.q.out
index c5a8de5..fbd294e 100644
--- a/ql/src/test/results/clientpositive/spark/vector_outer_join3.q.out
+++ b/ql/src/test/results/clientpositive/spark/vector_outer_join3.q.out
@@ -226,7 +226,7 @@ NULL NULL -850295959 -1887561756 NULL NULL WMIgGA73 4hA4KQj2vD3fI6gX82220d NULL
NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false
NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false
NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization detail formatted
select count(*) from (select c.cstring1
from small_alltypesorc_a c
left outer join small_alltypesorc_a cd
@@ -235,7 +235,7 @@ left outer join small_alltypesorc_a hd
on hd.cstring1 = c.cstring1
) t1
PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization detail formatted
select count(*) from (select c.cstring1
from small_alltypesorc_a c
left outer join small_alltypesorc_a cd
@@ -244,117 +244,7 @@ left outer join small_alltypesorc_a hd
on hd.cstring1 = c.cstring1
) t1
POSTHOOK: type: QUERY
-STAGE DEPENDENCIES:
- Stage-2 is a root stage
- Stage-1 depends on stages: Stage-2
- Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
- Stage: Stage-2
- Spark
-#### A masked pattern was here ####
- Vertices:
- Map 3
- Map Operator Tree:
- TableScan
- alias: cd
- Statistics: Num rows: 20 Data size: 5221 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: cint (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 20 Data size: 5221 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- Execution mode: vectorized
- Local Work:
- Map Reduce Local Work
- Map 4
- Map Operator Tree:
- TableScan
- alias: hd
- Statistics: Num rows: 20 Data size: 5221 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: cstring1 (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 20 Data size: 5221 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- keys:
- 0 _col1 (type: string)
- 1 _col0 (type: string)
- Execution mode: vectorized
- Local Work:
- Map Reduce Local Work
-
- Stage: Stage-1
- Spark
- Edges:
- Reducer 2 <- Map 1 (GROUP, 1)
#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: c
- Statistics: Num rows: 20 Data size: 5221 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: cint (type: int), cstring1 (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 20 Data size: 5221 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Left Outer Join0 to 1
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- outputColumnNames: _col1
- input vertices:
- 1 Map 3
- Statistics: Num rows: 22 Data size: 5743 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Left Outer Join0 to 1
- keys:
- 0 _col1 (type: string)
- 1 _col0 (type: string)
- input vertices:
- 1 Map 4
- Statistics: Num rows: 24 Data size: 6317 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count()
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: bigint)
- Execution mode: vectorized
- Local Work:
- Map Reduce Local Work
- Reducer 2
- Execution mode: vectorized
- Reduce Operator Tree:
- Group By Operator
- aggregations: count(VALUE._col0)
- mode: mergepartial
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Processor Tree:
- ListSink
-
PREHOOK: query: -- SORT_QUERY_RESULTS
select count(*) from (select c.cstring1
@@ -380,7 +270,7 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@small_alltypesorc_a
#### A masked pattern was here ####
20
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization detail formatted
select count(*) from (select c.cstring1
from small_alltypesorc_a c
left outer join small_alltypesorc_a cd
@@ -389,7 +279,7 @@ left outer join small_alltypesorc_a hd
on hd.cstring1 = c.cstring1
) t1
PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization detail formatted
select count(*) from (select c.cstring1
from small_alltypesorc_a c
left outer join small_alltypesorc_a cd
@@ -398,117 +288,7 @@ left outer join small_alltypesorc_a hd
on hd.cstring1 = c.cstring1
) t1
POSTHOOK: type: QUERY
-STAGE DEPENDENCIES:
- Stage-2 is a root stage
- Stage-1 depends on stages: Stage-2
- Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
- Stage: Stage-2
- Spark
-#### A masked pattern was here ####
- Vertices:
- Map 3
- Map Operator Tree:
- TableScan
- alias: cd
- Statistics: Num rows: 20 Data size: 5221 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: cstring2 (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 20 Data size: 5221 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- keys:
- 0 _col1 (type: string)
- 1 _col0 (type: string)
- Execution mode: vectorized
- Local Work:
- Map Reduce Local Work
- Map 4
- Map Operator Tree:
- TableScan
- alias: hd
- Statistics: Num rows: 20 Data size: 5221 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: cstring1 (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 20 Data size: 5221 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- Execution mode: vectorized
- Local Work:
- Map Reduce Local Work
-
- Stage: Stage-1
- Spark
- Edges:
- Reducer 2 <- Map 1 (GROUP, 1)
#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: c
- Statistics: Num rows: 20 Data size: 5221 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: cstring1 (type: string), cstring2 (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 20 Data size: 5221 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Left Outer Join0 to 1
- keys:
- 0 _col1 (type: string)
- 1 _col0 (type: string)
- outputColumnNames: _col0
- input vertices:
- 1 Map 3
- Statistics: Num rows: 22 Data size: 5743 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Left Outer Join0 to 1
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- input vertices:
- 1 Map 4
- Statistics: Num rows: 24 Data size: 6317 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count()
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: bigint)
- Execution mode: vectorized
- Local Work:
- Map Reduce Local Work
- Reducer 2
- Execution mode: vectorized
- Reduce Operator Tree:
- Group By Operator
- aggregations: count(VALUE._col0)
- mode: mergepartial
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Processor Tree:
- ListSink
-
PREHOOK: query: -- SORT_QUERY_RESULTS
select count(*) from (select c.cstring1
@@ -534,7 +314,7 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@small_alltypesorc_a
#### A masked pattern was here ####
28
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization detail formatted
select count(*) from (select c.cstring1
from small_alltypesorc_a c
left outer join small_alltypesorc_a cd
@@ -543,7 +323,7 @@ left outer join small_alltypesorc_a hd
on hd.cstring1 = c.cstring1 and hd.cint = c.cint
) t1
PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization detail formatted
select count(*) from (select c.cstring1
from small_alltypesorc_a c
left outer join small_alltypesorc_a cd
@@ -552,117 +332,7 @@ left outer join small_alltypesorc_a hd
on hd.cstring1 = c.cstring1 and hd.cint = c.cint
) t1
POSTHOOK: type: QUERY
-STAGE DEPENDENCIES:
- Stage-2 is a root stage
- Stage-1 depends on stages: Stage-2
- Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
- Stage: Stage-2
- Spark
#### A masked pattern was here ####
- Vertices:
- Map 3
- Map Operator Tree:
- TableScan
- alias: cd
- Statistics: Num rows: 20 Data size: 5221 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: cbigint (type: bigint), cstring2 (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 20 Data size: 5221 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- keys:
- 0 _col1 (type: bigint), _col3 (type: string)
- 1 _col0 (type: bigint), _col1 (type: string)
- Execution mode: vectorized
- Local Work:
- Map Reduce Local Work
- Map 4
- Map Operator Tree:
- TableScan
- alias: hd
- Statistics: Num rows: 20 Data size: 5221 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: cint (type: int), cstring1 (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 20 Data size: 5221 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- keys:
- 0 _col0 (type: int), _col2 (type: string)
- 1 _col0 (type: int), _col1 (type: string)
- Execution mode: vectorized
- Local Work:
- Map Reduce Local Work
-
- Stage: Stage-1
- Spark
- Edges:
- Reducer 2 <- Map 1 (GROUP, 1)
-#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: c
- Statistics: Num rows: 20 Data size: 5221 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: cint (type: int), cbigint (type: bigint), cstring1 (type: string), cstring2 (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 20 Data size: 5221 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Left Outer Join0 to 1
- keys:
- 0 _col1 (type: bigint), _col3 (type: string)
- 1 _col0 (type: bigint), _col1 (type: string)
- outputColumnNames: _col0, _col2
- input vertices:
- 1 Map 3
- Statistics: Num rows: 22 Data size: 5743 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Left Outer Join0 to 1
- keys:
- 0 _col0 (type: int), _col2 (type: string)
- 1 _col0 (type: int), _col1 (type: string)
- input vertices:
- 1 Map 4
- Statistics: Num rows: 24 Data size: 6317 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count()
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: bigint)
- Execution mode: vectorized
- Local Work:
- Map Reduce Local Work
- Reducer 2
- Execution mode: vectorized
- Reduce Operator Tree:
- Group By Operator
- aggregations: count(VALUE._col0)
- mode: mergepartial
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Processor Tree:
- ListSink
-
PREHOOK: query: -- SORT_QUERY_RESULTS
select count(*) from (select c.cstring1
http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/spark/vector_outer_join4.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vector_outer_join4.q.out b/ql/src/test/results/clientpositive/spark/vector_outer_join4.q.out
index 94860ab..b9b97f6 100644
--- a/ql/src/test/results/clientpositive/spark/vector_outer_join4.q.out
+++ b/ql/src/test/results/clientpositive/spark/vector_outer_join4.q.out
@@ -246,85 +246,19 @@ NULL NULL -850295959 -1887561756 NULL NULL WMIgGA73 4hA4KQj2vD3fI6gX82220d NULL
NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false
NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false
NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization detail formatted
select *
from small_alltypesorc_b c
left outer join small_alltypesorc_b cd
on cd.cint = c.cint
PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization detail formatted
select *
from small_alltypesorc_b c
left outer join small_alltypesorc_b cd
on cd.cint = c.cint
POSTHOOK: type: QUERY
-STAGE DEPENDENCIES:
- Stage-2 is a root stage
- Stage-1 depends on stages: Stage-2
- Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
- Stage: Stage-2
- Spark
-#### A masked pattern was here ####
- Vertices:
- Map 2
- Map Operator Tree:
- TableScan
- alias: cd
- Statistics: Num rows: 30 Data size: 4387 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
- Statistics: Num rows: 30 Data size: 4387 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- keys:
- 0 _col2 (type: int)
- 1 _col2 (type: int)
- Execution mode: vectorized
- Local Work:
- Map Reduce Local Work
-
- Stage: Stage-1
- Spark
#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: c
- Statistics: Num rows: 30 Data size: 4387 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
- Statistics: Num rows: 30 Data size: 4387 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Left Outer Join0 to 1
- keys:
- 0 _col2 (type: int)
- 1 _col2 (type: int)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23
- input vertices:
- 1 Map 2
- Statistics: Num rows: 33 Data size: 4825 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 33 Data size: 4825 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Execution mode: vectorized
- Local Work:
- Map Reduce Local Work
-
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Processor Tree:
- ListSink
-
PREHOOK: query: -- SORT_QUERY_RESULTS
select *
@@ -397,85 +331,19 @@ NULL NULL -850295959 -1887561756 NULL NULL WMIgGA73 4hA4KQj2vD3fI6gX82220d NULL
NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false
NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false
NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization detail formatted
select c.ctinyint
from small_alltypesorc_b c
left outer join small_alltypesorc_b hd
on hd.ctinyint = c.ctinyint
PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization detail formatted
select c.ctinyint
from small_alltypesorc_b c
left outer join small_alltypesorc_b hd
on hd.ctinyint = c.ctinyint
POSTHOOK: type: QUERY
-STAGE DEPENDENCIES:
- Stage-2 is a root stage
- Stage-1 depends on stages: Stage-2
- Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
- Stage: Stage-2
- Spark
-#### A masked pattern was here ####
- Vertices:
- Map 2
- Map Operator Tree:
- TableScan
- alias: hd
- Statistics: Num rows: 30 Data size: 4387 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: ctinyint (type: tinyint)
- outputColumnNames: _col0
- Statistics: Num rows: 30 Data size: 4387 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- keys:
- 0 _col0 (type: tinyint)
- 1 _col0 (type: tinyint)
- Execution mode: vectorized
- Local Work:
- Map Reduce Local Work
-
- Stage: Stage-1
- Spark
#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: c
- Statistics: Num rows: 30 Data size: 4387 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: ctinyint (type: tinyint)
- outputColumnNames: _col0
- Statistics: Num rows: 30 Data size: 4387 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Left Outer Join0 to 1
- keys:
- 0 _col0 (type: tinyint)
- 1 _col0 (type: tinyint)
- outputColumnNames: _col0
- input vertices:
- 1 Map 2
- Statistics: Num rows: 33 Data size: 4825 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 33 Data size: 4825 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Execution mode: vectorized
- Local Work:
- Map Reduce Local Work
-
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Processor Tree:
- ListSink
-
PREHOOK: query: -- SORT_QUERY_RESULTS
select c.ctinyint
@@ -904,7 +772,7 @@ NULL
NULL
NULL
NULL
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization detail formatted
select count(*) from (select c.ctinyint
from small_alltypesorc_b c
left outer join small_alltypesorc_b cd
@@ -913,7 +781,7 @@ left outer join small_alltypesorc_b hd
on hd.ctinyint = c.ctinyint
) t1
PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization detail formatted
select count(*) from (select c.ctinyint
from small_alltypesorc_b c
left outer join small_alltypesorc_b cd
@@ -922,117 +790,7 @@ left outer join small_alltypesorc_b hd
on hd.ctinyint = c.ctinyint
) t1
POSTHOOK: type: QUERY
-STAGE DEPENDENCIES:
- Stage-2 is a root stage
- Stage-1 depends on stages: Stage-2
- Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
- Stage: Stage-2
- Spark
#### A masked pattern was here ####
- Vertices:
- Map 3
- Map Operator Tree:
- TableScan
- alias: cd
- Statistics: Num rows: 30 Data size: 4387 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: cint (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 30 Data size: 4387 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- keys:
- 0 _col1 (type: int)
- 1 _col0 (type: int)
- Execution mode: vectorized
- Local Work:
- Map Reduce Local Work
- Map 4
- Map Operator Tree:
- TableScan
- alias: hd
- Statistics: Num rows: 30 Data size: 4387 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: ctinyint (type: tinyint)
- outputColumnNames: _col0
- Statistics: Num rows: 30 Data size: 4387 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- keys:
- 0 _col0 (type: tinyint)
- 1 _col0 (type: tinyint)
- Execution mode: vectorized
- Local Work:
- Map Reduce Local Work
-
- Stage: Stage-1
- Spark
- Edges:
- Reducer 2 <- Map 1 (GROUP, 1)
-#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: c
- Statistics: Num rows: 30 Data size: 4387 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: ctinyint (type: tinyint), cint (type: int)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 30 Data size: 4387 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Left Outer Join0 to 1
- keys:
- 0 _col1 (type: int)
- 1 _col0 (type: int)
- outputColumnNames: _col0
- input vertices:
- 1 Map 3
- Statistics: Num rows: 33 Data size: 4825 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Left Outer Join0 to 1
- keys:
- 0 _col0 (type: tinyint)
- 1 _col0 (type: tinyint)
- input vertices:
- 1 Map 4
- Statistics: Num rows: 36 Data size: 5307 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count()
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: bigint)
- Execution mode: vectorized
- Local Work:
- Map Reduce Local Work
- Reducer 2
- Execution mode: vectorized
- Reduce Operator Tree:
- Group By Operator
- aggregations: count(VALUE._col0)
- mode: mergepartial
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Processor Tree:
- ListSink
-
PREHOOK: query: -- SORT_QUERY_RESULTS
select count(*) from (select c.ctinyint