You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2018/03/03 11:50:56 UTC
[4/7] hive git commit: HIVE-18819: Vectorization: Optimize IF
statement expression evaluation of THEN/ELSE (Matt McCline,
reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/53980ba6/ql/src/test/results/clientpositive/llap/vector_udf_adaptor_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_udf_adaptor_1.q.out b/ql/src/test/results/clientpositive/llap/vector_udf_adaptor_1.q.out
index 7402667..4e36f37 100644
--- a/ql/src/test/results/clientpositive/llap/vector_udf_adaptor_1.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_udf_adaptor_1.q.out
@@ -1,4 +1,4 @@
-PREHOOK: query: create table student_2_lines(
+PREHOOK: query: create table student_10_lines_txt(
name string,
age int,
gpa double)
@@ -7,8 +7,8 @@ fields terminated by '\001'
stored as textfile
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
-PREHOOK: Output: default@student_2_lines
-POSTHOOK: query: create table student_2_lines(
+PREHOOK: Output: default@student_10_lines_txt
+POSTHOOK: query: create table student_10_lines_txt(
name string,
age int,
gpa double)
@@ -17,45 +17,431 @@ fields terminated by '\001'
stored as textfile
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
-POSTHOOK: Output: default@student_2_lines
-PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/student_2_lines' OVERWRITE INTO TABLE student_2_lines
+POSTHOOK: Output: default@student_10_lines_txt
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/student_10_lines' OVERWRITE INTO TABLE student_10_lines_txt
PREHOOK: type: LOAD
#### A masked pattern was here ####
-PREHOOK: Output: default@student_2_lines
-POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/student_2_lines' OVERWRITE INTO TABLE student_2_lines
+PREHOOK: Output: default@student_10_lines_txt
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/student_10_lines' OVERWRITE INTO TABLE student_10_lines_txt
POSTHOOK: type: LOAD
#### A masked pattern was here ####
-POSTHOOK: Output: default@student_2_lines
-PREHOOK: query: analyze table student_2_lines compute statistics
+POSTHOOK: Output: default@student_10_lines_txt
+PREHOOK: query: CREATE TABLE student_10_lines STORED AS ORC AS SELECT * FROM student_10_lines_txt
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@student_10_lines_txt
+PREHOOK: Output: database:default
+PREHOOK: Output: default@student_10_lines
+POSTHOOK: query: CREATE TABLE student_10_lines STORED AS ORC AS SELECT * FROM student_10_lines_txt
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@student_10_lines_txt
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@student_10_lines
+POSTHOOK: Lineage: student_10_lines.age SIMPLE [(student_10_lines_txt)student_10_lines_txt.FieldSchema(name:age, type:int, comment:null), ]
+POSTHOOK: Lineage: student_10_lines.gpa SIMPLE [(student_10_lines_txt)student_10_lines_txt.FieldSchema(name:gpa, type:double, comment:null), ]
+POSTHOOK: Lineage: student_10_lines.name SIMPLE [(student_10_lines_txt)student_10_lines_txt.FieldSchema(name:name, type:string, comment:null), ]
+student_10_lines_txt.name student_10_lines_txt.age student_10_lines_txt.gpa
+PREHOOK: query: INSERT INTO TABLE student_10_lines VALUES (NULL, NULL, NULL)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@student_10_lines
+POSTHOOK: query: INSERT INTO TABLE student_10_lines VALUES (NULL, NULL, NULL)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@student_10_lines
+POSTHOOK: Lineage: student_10_lines.age EXPRESSION []
+POSTHOOK: Lineage: student_10_lines.gpa EXPRESSION []
+POSTHOOK: Lineage: student_10_lines.name EXPRESSION []
+_col0 _col1 _col2
+PREHOOK: query: INSERT INTO TABLE student_10_lines VALUES ("George", 22, 3.8)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@student_10_lines
+POSTHOOK: query: INSERT INTO TABLE student_10_lines VALUES ("George", 22, 3.8)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@student_10_lines
+POSTHOOK: Lineage: student_10_lines.age SCRIPT []
+POSTHOOK: Lineage: student_10_lines.gpa SCRIPT []
+POSTHOOK: Lineage: student_10_lines.name SCRIPT []
+_col0 _col1 _col2
+PREHOOK: query: analyze table student_10_lines compute statistics
+PREHOOK: type: QUERY
+PREHOOK: Input: default@student_10_lines
+PREHOOK: Output: default@student_10_lines
+POSTHOOK: query: analyze table student_10_lines compute statistics
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@student_10_lines
+POSTHOOK: Output: default@student_10_lines
+student_10_lines.name student_10_lines.age student_10_lines.gpa
+PREHOOK: query: create table insert_a_adaptor (name string, age int, gpa double, a int, b timestamp, c string, d binary, e int, f double)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@insert_a_adaptor
+POSTHOOK: query: create table insert_a_adaptor (name string, age int, gpa double, a int, b timestamp, c string, d binary, e int, f double)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@insert_a_adaptor
+PREHOOK: query: explain vectorization detail
+insert overwrite table insert_a_adaptor
+ select
+ name,
+ age,
+ gpa,
+ IF(age<40, age, NULL),
+ IF(age>40, cast('2011-01-01 01:01:01' as timestamp), NULL),
+ IF(LENGTH(name)>8, name, NULL),
+ IF(LENGTH(name)<8, cast(name as binary), NULL),
+ IF(age>40, LENGTH(name), NULL),
+ IF(LENGTH(name)> 10, 2 * gpa, NULL)
+ from student_10_lines
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+insert overwrite table insert_a_adaptor
+ select
+ name,
+ age,
+ gpa,
+ IF(age<40, age, NULL),
+ IF(age>40, cast('2011-01-01 01:01:01' as timestamp), NULL),
+ IF(LENGTH(name)>8, name, NULL),
+ IF(LENGTH(name)<8, cast(name as binary), NULL),
+ IF(age>40, LENGTH(name), NULL),
+ IF(LENGTH(name)> 10, 2 * gpa, NULL)
+ from student_10_lines
+POSTHOOK: type: QUERY
+Explain
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+ Stage-3 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: student_10_lines
+ Statistics: Num rows: 12 Data size: 2352 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: name (type: string), age (type: int), gpa (type: double), if((age < 40), age, null) (type: int), if((age > 40), TIMESTAMP'2011-01-01 01:01:01.0', null) (type: timestamp), if((length(name) > 8), name, null) (type: string), if((length(name) < 8), CAST( name AS BINARY), null) (type: binary), if((age > 40), length(name), null) (type: int), if((length(name) > 10), (2.0D * gpa), null) (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 12 Data size: 2352 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 12 Data size: 2352 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.insert_a_adaptor
+ Execution mode: llap
+ LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ notVectorizedReason: SELECT operator: Unexpected primitive type category VOID
+ vectorized: false
+
+ Stage: Stage-2
+ Dependency Collection
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.insert_a_adaptor
+
+ Stage: Stage-3
+ Stats Work
+ Basic Stats Work:
+
+PREHOOK: query: insert overwrite table insert_a_adaptor
+ select
+ name,
+ age,
+ gpa,
+ IF(age<40, age, NULL),
+ IF(age>40, cast('2011-01-01 01:01:01' as timestamp), NULL),
+ IF(LENGTH(name)>8, name, NULL),
+ IF(LENGTH(name)<8, cast(name as binary), NULL),
+ IF(age>40, LENGTH(name), NULL),
+ IF(LENGTH(name)> 10, 2 * gpa, NULL)
+ from student_10_lines
+PREHOOK: type: QUERY
+PREHOOK: Input: default@student_10_lines
+PREHOOK: Output: default@insert_a_adaptor
+POSTHOOK: query: insert overwrite table insert_a_adaptor
+ select
+ name,
+ age,
+ gpa,
+ IF(age<40, age, NULL),
+ IF(age>40, cast('2011-01-01 01:01:01' as timestamp), NULL),
+ IF(LENGTH(name)>8, name, NULL),
+ IF(LENGTH(name)<8, cast(name as binary), NULL),
+ IF(age>40, LENGTH(name), NULL),
+ IF(LENGTH(name)> 10, 2 * gpa, NULL)
+ from student_10_lines
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@student_10_lines
+POSTHOOK: Output: default@insert_a_adaptor
+POSTHOOK: Lineage: insert_a_adaptor.a EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), ]
+POSTHOOK: Lineage: insert_a_adaptor.age SIMPLE [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), ]
+POSTHOOK: Lineage: insert_a_adaptor.b EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), ]
+POSTHOOK: Lineage: insert_a_adaptor.c EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ]
+POSTHOOK: Lineage: insert_a_adaptor.d EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ]
+POSTHOOK: Lineage: insert_a_adaptor.e EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), (student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ]
+POSTHOOK: Lineage: insert_a_adaptor.f EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), (student_10_lines)student_10_lines.FieldSchema(name:gpa, type:double, comment:null), ]
+POSTHOOK: Lineage: insert_a_adaptor.gpa SIMPLE [(student_10_lines)student_10_lines.FieldSchema(name:gpa, type:double, comment:null), ]
+POSTHOOK: Lineage: insert_a_adaptor.name SIMPLE [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ]
+name age gpa _c3 _c4 _c5 _c6 _c7 _c8
+PREHOOK: query: select * from insert_a_adaptor
+PREHOOK: type: QUERY
+PREHOOK: Input: default@insert_a_adaptor
+#### A masked pattern was here ####
+POSTHOOK: query: select * from insert_a_adaptor
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@insert_a_adaptor
+#### A masked pattern was here ####
+insert_a_adaptor.name insert_a_adaptor.age insert_a_adaptor.gpa insert_a_adaptor.a insert_a_adaptor.b insert_a_adaptor.c insert_a_adaptor.d insert_a_adaptor.e insert_a_adaptor.f
+George 22 3.8 22 NULL NULL George NULL NULL
+NULL NULL NULL NULL NULL NULL NULL NULL NULL
+calvin brown 28 2.7 28 NULL calvin brown NULL NULL 5.4
+luke brown 60 1.14 NULL 2011-01-01 01:01:01 luke brown NULL 10 NULL
+luke king 28 0.47 28 NULL luke king NULL NULL NULL
+nick johnson 34 NULL 34 NULL nick johnson NULL NULL NULL
+oscar thompson 35 2.98 35 NULL oscar thompson NULL NULL 5.96
+priscilla falkner 55 1.16 NULL 2011-01-01 01:01:01 priscilla falkner NULL 17 2.32
+quinn ovid 19 NULL 19 NULL quinn ovid NULL NULL NULL
+tom thompson 42 0.53 NULL 2011-01-01 01:01:01 tom thompson NULL 12 1.06
+ulysses garcia 35 2.74 35 NULL ulysses garcia NULL NULL 5.48
+xavier garcia 33 1.06 33 NULL xavier garcia NULL NULL 2.12
+PREHOOK: query: create table insert_a_good (name string, age int, gpa double, a int, b timestamp, c string, d binary, e int, f double)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@insert_a_good
+POSTHOOK: query: create table insert_a_good (name string, age int, gpa double, a int, b timestamp, c string, d binary, e int, f double)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@insert_a_good
+PREHOOK: query: explain vectorization detail
+insert overwrite table insert_a_good
+ select
+ name,
+ age,
+ gpa,
+ IF(age<40, age, NULL),
+ IF(age>40, cast('2011-01-01 01:01:01' as timestamp), NULL),
+ IF(LENGTH(name)>8, name, NULL),
+ IF(LENGTH(name)<8, cast(name as binary), NULL),
+ IF(age>40, LENGTH(name), NULL),
+ IF(LENGTH(name)> 10, 2 * gpa, NULL)
+ from student_10_lines
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+insert overwrite table insert_a_good
+ select
+ name,
+ age,
+ gpa,
+ IF(age<40, age, NULL),
+ IF(age>40, cast('2011-01-01 01:01:01' as timestamp), NULL),
+ IF(LENGTH(name)>8, name, NULL),
+ IF(LENGTH(name)<8, cast(name as binary), NULL),
+ IF(age>40, LENGTH(name), NULL),
+ IF(LENGTH(name)> 10, 2 * gpa, NULL)
+ from student_10_lines
+POSTHOOK: type: QUERY
+Explain
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+ Stage-3 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: student_10_lines
+ Statistics: Num rows: 12 Data size: 2352 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:name:string, 1:age:int, 2:gpa:double, 3:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>]
+ Select Operator
+ expressions: name (type: string), age (type: int), gpa (type: double), if((age < 40), age, null) (type: int), if((age > 40), TIMESTAMP'2011-01-01 01:01:01.0', null) (type: timestamp), if((length(name) > 8), name, null) (type: string), if((length(name) < 8), CAST( name AS BINARY), null) (type: binary), if((age > 40), length(name), null) (type: int), if((length(name) > 10), (2.0D * gpa), null) (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 1, 2, 5, 8, 11, 14, 16, 20]
+ selectExpressions: IfExprColumnNull(col 4:boolean, col 1:int, null)(children: LongColLessLongScalar(col 1:int, val 40) -> 4:boolean, col 1:int) -> 5:int, IfExprColumnNull(col 6:boolean, col 7:timestamp, null)(children: LongColGreaterLongScalar(col 1:int, val 40) -> 6:boolean, ConstantVectorExpression(val 2011-01-01 01:01:01.0) -> 7:timestamp) -> 8:timestamp, IfExprColumnNull(col 10:boolean, col 0:string, null)(children: LongColGreaterLongScalar(col 9:int, val 8)(children: StringLength(col 0:string) -> 9:int) -> 10:boolean, col 0:string) -> 11:string, IfExprColumnNull(col 12:boolean, col 13:binary, null)(children: LongColLessLongScalar(col 9:int, val 8)(children: StringLength(col 0:string) -> 9:int) -> 12:boolean, VectorUDFAdaptor(CAST( name AS BINARY)) -> 13:binary) -> 14:binary, IfExprColumnNull(col 9:boolean, col 15:int, null)(children: LongColGreaterLongScalar(col 1:int, val 40) -> 9:boolean, StringLength(col 0:string) -> 15:int) -> 16:int, IfExprColumnNul
l(col 18:boolean, col 19:double, null)(children: LongColGreaterLongScalar(col 17:int, val 10)(children: StringLength(col 0:string) -> 17:int) -> 18:boolean, DoubleScalarMultiplyDoubleColumn(val 2.0, col 2:double) -> 19:double) -> 20:double
+ Statistics: Num rows: 12 Data size: 2352 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 12 Data size: 2352 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.insert_a_good
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: []
+ featureSupportInUse: []
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: true
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 3
+ includeColumns: [0, 1, 2]
+ dataColumns: name:string, age:int, gpa:double
+ partitionColumnCount: 0
+ scratchColumnTypeNames: [bigint, bigint, bigint, timestamp, timestamp, bigint, bigint, string, bigint, string, string, bigint, bigint, bigint, bigint, double, double]
+
+ Stage: Stage-2
+ Dependency Collection
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.insert_a_good
+
+ Stage: Stage-3
+ Stats Work
+ Basic Stats Work:
+
+PREHOOK: query: insert overwrite table insert_a_good
+ select
+ name,
+ age,
+ gpa,
+ IF(age<40, age, NULL),
+ IF(age>40, cast('2011-01-01 01:01:01' as timestamp), NULL),
+ IF(LENGTH(name)>8, name, NULL),
+ IF(LENGTH(name)<8, cast(name as binary), NULL),
+ IF(age>40, LENGTH(name), NULL),
+ IF(LENGTH(name)> 10, 2 * gpa, NULL)
+ from student_10_lines
+PREHOOK: type: QUERY
+PREHOOK: Input: default@student_10_lines
+PREHOOK: Output: default@insert_a_good
+POSTHOOK: query: insert overwrite table insert_a_good
+ select
+ name,
+ age,
+ gpa,
+ IF(age<40, age, NULL),
+ IF(age>40, cast('2011-01-01 01:01:01' as timestamp), NULL),
+ IF(LENGTH(name)>8, name, NULL),
+ IF(LENGTH(name)<8, cast(name as binary), NULL),
+ IF(age>40, LENGTH(name), NULL),
+ IF(LENGTH(name)> 10, 2 * gpa, NULL)
+ from student_10_lines
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@student_10_lines
+POSTHOOK: Output: default@insert_a_good
+POSTHOOK: Lineage: insert_a_good.a EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), ]
+POSTHOOK: Lineage: insert_a_good.age SIMPLE [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), ]
+POSTHOOK: Lineage: insert_a_good.b EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), ]
+POSTHOOK: Lineage: insert_a_good.c EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ]
+POSTHOOK: Lineage: insert_a_good.d EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ]
+POSTHOOK: Lineage: insert_a_good.e EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), (student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ]
+POSTHOOK: Lineage: insert_a_good.f EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), (student_10_lines)student_10_lines.FieldSchema(name:gpa, type:double, comment:null), ]
+POSTHOOK: Lineage: insert_a_good.gpa SIMPLE [(student_10_lines)student_10_lines.FieldSchema(name:gpa, type:double, comment:null), ]
+POSTHOOK: Lineage: insert_a_good.name SIMPLE [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ]
+name age gpa _c3 _c4 _c5 _c6 _c7 _c8
+PREHOOK: query: select * from insert_a_good
PREHOOK: type: QUERY
-PREHOOK: Input: default@student_2_lines
-PREHOOK: Output: default@student_2_lines
-POSTHOOK: query: analyze table student_2_lines compute statistics
+PREHOOK: Input: default@insert_a_good
+#### A masked pattern was here ####
+POSTHOOK: query: select * from insert_a_good
POSTHOOK: type: QUERY
-POSTHOOK: Input: default@student_2_lines
-POSTHOOK: Output: default@student_2_lines
-PREHOOK: query: create table insert_10_1 (a float, b int, c timestamp, d binary)
+POSTHOOK: Input: default@insert_a_good
+#### A masked pattern was here ####
+insert_a_good.name insert_a_good.age insert_a_good.gpa insert_a_good.a insert_a_good.b insert_a_good.c insert_a_good.d insert_a_good.e insert_a_good.f
+George 22 3.8 22 NULL NULL George NULL NULL
+NULL NULL NULL NULL NULL NULL NULL NULL NULL
+calvin brown 28 2.7 28 NULL calvin brown NULL NULL 5.4
+luke brown 60 1.14 NULL 2011-01-01 01:01:01 luke brown NULL 10 NULL
+luke king 28 0.47 28 NULL luke king NULL NULL NULL
+nick johnson 34 NULL 34 NULL nick johnson NULL NULL NULL
+oscar thompson 35 2.98 35 NULL oscar thompson NULL NULL 5.96
+priscilla falkner 55 1.16 NULL 2011-01-01 01:01:01 priscilla falkner NULL 17 2.32
+quinn ovid 19 NULL 19 NULL quinn ovid NULL NULL NULL
+tom thompson 42 0.53 NULL 2011-01-01 01:01:01 tom thompson NULL 12 1.06
+ulysses garcia 35 2.74 35 NULL ulysses garcia NULL NULL 5.48
+xavier garcia 33 1.06 33 NULL xavier garcia NULL NULL 2.12
+PREHOOK: query: create table insert_a_better (name string, age int, gpa double, a int, b timestamp, c string, d binary, e int, f double)
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
-PREHOOK: Output: default@insert_10_1
-POSTHOOK: query: create table insert_10_1 (a float, b int, c timestamp, d binary)
+PREHOOK: Output: default@insert_a_better
+POSTHOOK: query: create table insert_a_better (name string, age int, gpa double, a int, b timestamp, c string, d binary, e int, f double)
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
-POSTHOOK: Output: default@insert_10_1
+POSTHOOK: Output: default@insert_a_better
PREHOOK: query: explain vectorization detail
-insert overwrite table insert_10_1
- select cast(gpa as float),
- age,
- IF(age>40,cast('2011-01-01 01:01:01' as timestamp),NULL),
- IF(LENGTH(name)>10,cast(name as binary),NULL) from student_2_lines
+insert overwrite table insert_a_better
+ select
+ name,
+ age,
+ gpa,
+ IF(age<40, age, NULL),
+ IF(age>40, cast('2011-01-01 01:01:01' as timestamp), NULL),
+ IF(LENGTH(name)>8, name, NULL),
+ IF(LENGTH(name)<8, cast(name as binary), NULL),
+ IF(age>40, LENGTH(name), NULL),
+ IF(LENGTH(name)> 10, 2 * gpa, NULL)
+ from student_10_lines
PREHOOK: type: QUERY
POSTHOOK: query: explain vectorization detail
-insert overwrite table insert_10_1
- select cast(gpa as float),
- age,
- IF(age>40,cast('2011-01-01 01:01:01' as timestamp),NULL),
- IF(LENGTH(name)>10,cast(name as binary),NULL) from student_2_lines
+insert overwrite table insert_a_better
+ select
+ name,
+ age,
+ gpa,
+ IF(age<40, age, NULL),
+ IF(age>40, cast('2011-01-01 01:01:01' as timestamp), NULL),
+ IF(LENGTH(name)>8, name, NULL),
+ IF(LENGTH(name)<8, cast(name as binary), NULL),
+ IF(age>40, LENGTH(name), NULL),
+ IF(LENGTH(name)> 10, 2 * gpa, NULL)
+ from student_10_lines
POSTHOOK: type: QUERY
+Explain
PLAN VECTORIZATION:
enabled: true
enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
@@ -74,40 +460,39 @@ STAGE PLANS:
Map 1
Map Operator Tree:
TableScan
- alias: student_2_lines
- Statistics: Num rows: 2 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+ alias: student_10_lines
+ Statistics: Num rows: 12 Data size: 2352 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
vectorizationSchemaColumns: [0:name:string, 1:age:int, 2:gpa:double, 3:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>]
Select Operator
- expressions: UDFToFloat(gpa) (type: float), age (type: int), if((age > 40), TIMESTAMP'2011-01-01 01:01:01.0', null) (type: timestamp), if((length(name) > 10), CAST( name AS BINARY), null) (type: binary)
- outputColumnNames: _col0, _col1, _col2, _col3
+ expressions: name (type: string), age (type: int), gpa (type: double), if((age < 40), age, null) (type: int), if((age > 40), TIMESTAMP'2011-01-01 01:01:01.0', null) (type: timestamp), if((length(name) > 8), name, null) (type: string), if((length(name) < 8), CAST( name AS BINARY), null) (type: binary), if((age > 40), length(name), null) (type: int), if((length(name) > 10), (2.0D * gpa), null) (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
Select Vectorization:
className: VectorSelectOperator
native: true
- projectedOutputColumnNums: [2, 1, 5, 8]
- selectExpressions: VectorUDFAdaptor(if((age > 40), TIMESTAMP'2011-01-01 01:01:01.0', null))(children: LongColGreaterLongScalar(col 1:int, val 40) -> 4:boolean) -> 5:timestamp, VectorUDFAdaptor(if((length(name) > 10), CAST( name AS BINARY), null))(children: LongColGreaterLongScalar(col 4:int, val 10)(children: StringLength(col 0:string) -> 4:int) -> 6:boolean, VectorUDFAdaptor(CAST( name AS BINARY)) -> 7:binary) -> 8:binary
- Statistics: Num rows: 2 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+ projectedOutputColumnNums: [0, 1, 2, 5, 8, 11, 14, 16, 20]
+ selectExpressions: IfExprColumnNull(col 4:boolean, col 1:int, null)(children: LongColLessLongScalar(col 1:int, val 40) -> 4:boolean, col 1:int) -> 5:int, IfExprColumnNull(col 6:boolean, col 7:timestamp, null)(children: LongColGreaterLongScalar(col 1:int, val 40) -> 6:boolean, ConstantVectorExpression(val 2011-01-01 01:01:01.0) -> 7:timestamp) -> 8:timestamp, IfExprColumnNull(col 10:boolean, col 0:string, null)(children: LongColGreaterLongScalar(col 9:int, val 8)(children: StringLength(col 0:string) -> 9:int) -> 10:boolean, col 0:string) -> 11:string, IfExprCondExprNull(col 12:boolean, col 13:binary, null)(children: LongColLessLongScalar(col 9:int, val 8)(children: StringLength(col 0:string) -> 9:int) -> 12:boolean, VectorUDFAdaptor(CAST( name AS BINARY)) -> 13:binary) -> 14:binary, IfExprCondExprNull(col 9:boolean, col 15:int, null)(children: LongColGreaterLongScalar(col 1:int, val 40) -> 9:boolean, StringLength(col 0:string) -> 15:int) -> 16:int, IfExprCondE
xprNull(col 18:boolean, col 19:double, null)(children: LongColGreaterLongScalar(col 17:int, val 10)(children: StringLength(col 0:string) -> 17:int) -> 18:boolean, DoubleScalarMultiplyDoubleColumn(val 2.0, col 2:double) -> 19:double) -> 20:double
+ Statistics: Num rows: 12 Data size: 2352 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
- Statistics: Num rows: 2 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12 Data size: 2352 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.insert_10_1
+ name: default.insert_a_better
Execution mode: vectorized, llap
- LLAP IO: no inputs
+ LLAP IO: all inputs
Map Vectorization:
enabled: true
- enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled]
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: []
featureSupportInUse: []
- inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
allNative: false
usesVectorUDFAdaptor: true
vectorized: true
@@ -116,7 +501,7 @@ STAGE PLANS:
includeColumns: [0, 1, 2]
dataColumns: name:string, age:int, gpa:double
partitionColumnCount: 0
- scratchColumnTypeNames: [bigint, timestamp, bigint, string, string]
+ scratchColumnTypeNames: [bigint, bigint, bigint, timestamp, timestamp, bigint, bigint, string, bigint, string, string, bigint, bigint, bigint, bigint, double, double]
Stage: Stage-2
Dependency Collection
@@ -129,29 +514,578 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.insert_10_1
+ name: default.insert_a_better
Stage: Stage-3
Stats Work
Basic Stats Work:
-PREHOOK: query: insert overwrite table insert_10_1
- select cast(gpa as float),
- age,
- IF(age>40,cast('2011-01-01 01:01:01' as timestamp),NULL),
- IF(LENGTH(name)>10,cast(name as binary),NULL) from student_2_lines
-PREHOOK: type: QUERY
-PREHOOK: Input: default@student_2_lines
-PREHOOK: Output: default@insert_10_1
-POSTHOOK: query: insert overwrite table insert_10_1
- select cast(gpa as float),
- age,
- IF(age>40,cast('2011-01-01 01:01:01' as timestamp),NULL),
- IF(LENGTH(name)>10,cast(name as binary),NULL) from student_2_lines
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@student_2_lines
-POSTHOOK: Output: default@insert_10_1
-POSTHOOK: Lineage: insert_10_1.a EXPRESSION [(student_2_lines)student_2_lines.FieldSchema(name:gpa, type:double, comment:null), ]
-POSTHOOK: Lineage: insert_10_1.b SIMPLE [(student_2_lines)student_2_lines.FieldSchema(name:age, type:int, comment:null), ]
-POSTHOOK: Lineage: insert_10_1.c EXPRESSION [(student_2_lines)student_2_lines.FieldSchema(name:age, type:int, comment:null), ]
-POSTHOOK: Lineage: insert_10_1.d EXPRESSION [(student_2_lines)student_2_lines.FieldSchema(name:name, type:string, comment:null), ]
+PREHOOK: query: insert overwrite table insert_a_better
+ select
+ name,
+ age,
+ gpa,
+ IF(age<40, age, NULL),
+ IF(age>40, cast('2011-01-01 01:01:01' as timestamp), NULL),
+ IF(LENGTH(name)>8, name, NULL),
+ IF(LENGTH(name)<8, cast(name as binary), NULL),
+ IF(age>40, LENGTH(name), NULL),
+ IF(LENGTH(name)> 10, 2 * gpa, NULL)
+ from student_10_lines
+PREHOOK: type: QUERY
+PREHOOK: Input: default@student_10_lines
+PREHOOK: Output: default@insert_a_better
+POSTHOOK: query: insert overwrite table insert_a_better
+ select
+ name,
+ age,
+ gpa,
+ IF(age<40, age, NULL),
+ IF(age>40, cast('2011-01-01 01:01:01' as timestamp), NULL),
+ IF(LENGTH(name)>8, name, NULL),
+ IF(LENGTH(name)<8, cast(name as binary), NULL),
+ IF(age>40, LENGTH(name), NULL),
+ IF(LENGTH(name)> 10, 2 * gpa, NULL)
+ from student_10_lines
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@student_10_lines
+POSTHOOK: Output: default@insert_a_better
+POSTHOOK: Lineage: insert_a_better.a EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), ]
+POSTHOOK: Lineage: insert_a_better.age SIMPLE [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), ]
+POSTHOOK: Lineage: insert_a_better.b EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), ]
+POSTHOOK: Lineage: insert_a_better.c EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ]
+POSTHOOK: Lineage: insert_a_better.d EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ]
+POSTHOOK: Lineage: insert_a_better.e EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), (student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ]
+POSTHOOK: Lineage: insert_a_better.f EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), (student_10_lines)student_10_lines.FieldSchema(name:gpa, type:double, comment:null), ]
+POSTHOOK: Lineage: insert_a_better.gpa SIMPLE [(student_10_lines)student_10_lines.FieldSchema(name:gpa, type:double, comment:null), ]
+POSTHOOK: Lineage: insert_a_better.name SIMPLE [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ]
+name age gpa _c3 _c4 _c5 _c6 _c7 _c8
+PREHOOK: query: select * from insert_a_better
+PREHOOK: type: QUERY
+PREHOOK: Input: default@insert_a_better
+#### A masked pattern was here ####
+POSTHOOK: query: select * from insert_a_better
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@insert_a_better
+#### A masked pattern was here ####
+insert_a_better.name insert_a_better.age insert_a_better.gpa insert_a_better.a insert_a_better.b insert_a_better.c insert_a_better.d insert_a_better.e insert_a_better.f
+George 22 3.8 22 NULL NULL George NULL NULL
+NULL NULL NULL NULL NULL NULL NULL NULL NULL
+calvin brown 28 2.7 28 NULL calvin brown NULL NULL 5.4
+luke brown 60 1.14 NULL 2011-01-01 01:01:01 luke brown NULL 10 NULL
+luke king 28 0.47 28 NULL luke king NULL NULL NULL
+nick johnson 34 NULL 34 NULL nick johnson NULL NULL NULL
+oscar thompson 35 2.98 35 NULL oscar thompson NULL NULL 5.96
+priscilla falkner 55 1.16 NULL 2011-01-01 01:01:01 priscilla falkner NULL 17 2.32
+quinn ovid 19 NULL 19 NULL quinn ovid NULL NULL NULL
+tom thompson 42 0.53 NULL 2011-01-01 01:01:01 tom thompson NULL 12 1.06
+ulysses garcia 35 2.74 35 NULL ulysses garcia NULL NULL 5.48
+xavier garcia 33 1.06 33 NULL xavier garcia NULL NULL 2.12
+PREHOOK: query: create table insert_b_adaptor (name string, age int, gpa double, a int, b timestamp, c string, d binary, e int, f double)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@insert_b_adaptor
+POSTHOOK: query: create table insert_b_adaptor (name string, age int, gpa double, a int, b timestamp, c string, d binary, e int, f double)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@insert_b_adaptor
+PREHOOK: query: explain vectorization detail
+insert overwrite table insert_b_adaptor
+ select
+ name,
+ age,
+ gpa,
+ IF(age<40, NULL, age),
+ IF(age>40, NULL, cast('2011-01-01 01:01:01' as timestamp)),
+ IF(LENGTH(name)>8, NULL, name),
+ IF(LENGTH(name)<8, NULL, cast(name as binary)),
+ IF(age>40, NULL, LENGTH(name)),
+ IF(LENGTH(name)> 10, NULL, 2 * gpa)
+ from student_10_lines
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+insert overwrite table insert_b_adaptor
+ select
+ name,
+ age,
+ gpa,
+ IF(age<40, NULL, age),
+ IF(age>40, NULL, cast('2011-01-01 01:01:01' as timestamp)),
+ IF(LENGTH(name)>8, NULL, name),
+ IF(LENGTH(name)<8, NULL, cast(name as binary)),
+ IF(age>40, NULL, LENGTH(name)),
+ IF(LENGTH(name)> 10, NULL, 2 * gpa)
+ from student_10_lines
+POSTHOOK: type: QUERY
+Explain
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+ Stage-3 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: student_10_lines
+ Statistics: Num rows: 12 Data size: 2352 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: name (type: string), age (type: int), gpa (type: double), if((age < 40), null, age) (type: int), if((age > 40), null, TIMESTAMP'2011-01-01 01:01:01.0') (type: timestamp), if((length(name) > 8), null, name) (type: string), if((length(name) < 8), null, CAST( name AS BINARY)) (type: binary), if((age > 40), null, length(name)) (type: int), if((length(name) > 10), null, (2.0D * gpa)) (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 12 Data size: 2352 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 12 Data size: 2352 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.insert_b_adaptor
+ Execution mode: llap
+ LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ notVectorizedReason: SELECT operator: Unexpected primitive type category VOID
+ vectorized: false
+
+ Stage: Stage-2
+ Dependency Collection
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.insert_b_adaptor
+
+ Stage: Stage-3
+ Stats Work
+ Basic Stats Work:
+
+PREHOOK: query: insert overwrite table insert_b_adaptor
+ select
+ name,
+ age,
+ gpa,
+ IF(age<40, NULL, age),
+ IF(age>40, NULL, cast('2011-01-01 01:01:01' as timestamp)),
+ IF(LENGTH(name)>8, NULL, name),
+ IF(LENGTH(name)<8, NULL, cast(name as binary)),
+ IF(age>40, NULL, LENGTH(name)),
+ IF(LENGTH(name)> 10, NULL, 2 * gpa)
+ from student_10_lines
+PREHOOK: type: QUERY
+PREHOOK: Input: default@student_10_lines
+PREHOOK: Output: default@insert_b_adaptor
+POSTHOOK: query: insert overwrite table insert_b_adaptor
+ select
+ name,
+ age,
+ gpa,
+ IF(age<40, NULL, age),
+ IF(age>40, NULL, cast('2011-01-01 01:01:01' as timestamp)),
+ IF(LENGTH(name)>8, NULL, name),
+ IF(LENGTH(name)<8, NULL, cast(name as binary)),
+ IF(age>40, NULL, LENGTH(name)),
+ IF(LENGTH(name)> 10, NULL, 2 * gpa)
+ from student_10_lines
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@student_10_lines
+POSTHOOK: Output: default@insert_b_adaptor
+POSTHOOK: Lineage: insert_b_adaptor.a EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), ]
+POSTHOOK: Lineage: insert_b_adaptor.age SIMPLE [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), ]
+POSTHOOK: Lineage: insert_b_adaptor.b EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), ]
+POSTHOOK: Lineage: insert_b_adaptor.c EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ]
+POSTHOOK: Lineage: insert_b_adaptor.d EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ]
+POSTHOOK: Lineage: insert_b_adaptor.e EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), (student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ]
+POSTHOOK: Lineage: insert_b_adaptor.f EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), (student_10_lines)student_10_lines.FieldSchema(name:gpa, type:double, comment:null), ]
+POSTHOOK: Lineage: insert_b_adaptor.gpa SIMPLE [(student_10_lines)student_10_lines.FieldSchema(name:gpa, type:double, comment:null), ]
+POSTHOOK: Lineage: insert_b_adaptor.name SIMPLE [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ]
+name age gpa _c3 _c4 _c5 _c6 _c7 _c8
+PREHOOK: query: select * from insert_b_adaptor
+PREHOOK: type: QUERY
+PREHOOK: Input: default@insert_b_adaptor
+#### A masked pattern was here ####
+POSTHOOK: query: select * from insert_b_adaptor
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@insert_b_adaptor
+#### A masked pattern was here ####
+insert_b_adaptor.name insert_b_adaptor.age insert_b_adaptor.gpa insert_b_adaptor.a insert_b_adaptor.b insert_b_adaptor.c insert_b_adaptor.d insert_b_adaptor.e insert_b_adaptor.f
+George 22 3.8 NULL 2011-01-01 01:01:01 George NULL 6 7.6
+NULL NULL NULL NULL 2011-01-01 01:01:01 NULL NULL NULL NULL
+calvin brown 28 2.7 NULL 2011-01-01 01:01:01 NULL calvin brown 12 NULL
+luke brown 60 1.14 60 NULL NULL luke brown NULL 2.28
+luke king 28 0.47 NULL 2011-01-01 01:01:01 NULL luke king 9 0.94
+nick johnson 34 NULL NULL 2011-01-01 01:01:01 NULL nick johnson 12 NULL
+oscar thompson 35 2.98 NULL 2011-01-01 01:01:01 NULL oscar thompson 14 NULL
+priscilla falkner 55 1.16 55 NULL NULL priscilla falkner NULL NULL
+quinn ovid 19 NULL NULL 2011-01-01 01:01:01 NULL quinn ovid 10 NULL
+tom thompson 42 0.53 42 NULL NULL tom thompson NULL NULL
+ulysses garcia 35 2.74 NULL 2011-01-01 01:01:01 NULL ulysses garcia 14 NULL
+xavier garcia 33 1.06 NULL 2011-01-01 01:01:01 NULL xavier garcia 13 NULL
+PREHOOK: query: create table insert_b_good (name string, age int, gpa double, a int, b timestamp, c string, d binary, e int, f double)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@insert_b_good
+POSTHOOK: query: create table insert_b_good (name string, age int, gpa double, a int, b timestamp, c string, d binary, e int, f double)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@insert_b_good
+PREHOOK: query: explain vectorization detail
+insert overwrite table insert_b_good
+ select
+ name,
+ age,
+ gpa,
+ IF(age<40, NULL, age),
+ IF(age>40, NULL, cast('2011-01-01 01:01:01' as timestamp)),
+ IF(LENGTH(name)>8, NULL, name),
+ IF(LENGTH(name)<8, NULL, cast(name as binary)),
+ IF(age>40, NULL, LENGTH(name)),
+ IF(LENGTH(name)> 10, NULL, 2 * gpa)
+ from student_10_lines
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+insert overwrite table insert_b_good
+ select
+ name,
+ age,
+ gpa,
+ IF(age<40, NULL, age),
+ IF(age>40, NULL, cast('2011-01-01 01:01:01' as timestamp)),
+ IF(LENGTH(name)>8, NULL, name),
+ IF(LENGTH(name)<8, NULL, cast(name as binary)),
+ IF(age>40, NULL, LENGTH(name)),
+ IF(LENGTH(name)> 10, NULL, 2 * gpa)
+ from student_10_lines
+POSTHOOK: type: QUERY
+Explain
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+ Stage-3 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: student_10_lines
+ Statistics: Num rows: 12 Data size: 2352 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:name:string, 1:age:int, 2:gpa:double, 3:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>]
+ Select Operator
+ expressions: name (type: string), age (type: int), gpa (type: double), if((age < 40), null, age) (type: int), if((age > 40), null, TIMESTAMP'2011-01-01 01:01:01.0') (type: timestamp), if((length(name) > 8), null, name) (type: string), if((length(name) < 8), null, CAST( name AS BINARY)) (type: binary), if((age > 40), null, length(name)) (type: int), if((length(name) > 10), null, (2.0D * gpa)) (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 1, 2, 5, 8, 11, 14, 16, 20]
+ selectExpressions: IfExprNullColumn(col 4:boolean, null, col 1)(children: LongColLessLongScalar(col 1:int, val 40) -> 4:boolean, col 1:int) -> 5:int, IfExprNullColumn(col 6:boolean, null, col 7)(children: LongColGreaterLongScalar(col 1:int, val 40) -> 6:boolean, ConstantVectorExpression(val 2011-01-01 01:01:01.0) -> 7:timestamp) -> 8:timestamp, IfExprNullColumn(col 10:boolean, null, col 0)(children: LongColGreaterLongScalar(col 9:int, val 8)(children: StringLength(col 0:string) -> 9:int) -> 10:boolean, col 0:string) -> 11:string, IfExprNullColumn(col 12:boolean, null, col 13)(children: LongColLessLongScalar(col 9:int, val 8)(children: StringLength(col 0:string) -> 9:int) -> 12:boolean, VectorUDFAdaptor(CAST( name AS BINARY)) -> 13:binary) -> 14:binary, IfExprNullColumn(col 9:boolean, null, col 15)(children: LongColGreaterLongScalar(col 1:int, val 40) -> 9:boolean, StringLength(col 0:string) -> 15:int) -> 16:int, IfExprNullColumn(col 18:boolean, null, col 19)(
children: LongColGreaterLongScalar(col 17:int, val 10)(children: StringLength(col 0:string) -> 17:int) -> 18:boolean, DoubleScalarMultiplyDoubleColumn(val 2.0, col 2:double) -> 19:double) -> 20:double
+ Statistics: Num rows: 12 Data size: 2352 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 12 Data size: 2352 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.insert_b_good
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: []
+ featureSupportInUse: []
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: true
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 3
+ includeColumns: [0, 1, 2]
+ dataColumns: name:string, age:int, gpa:double
+ partitionColumnCount: 0
+ scratchColumnTypeNames: [bigint, bigint, bigint, timestamp, timestamp, bigint, bigint, string, bigint, string, string, bigint, bigint, bigint, bigint, double, double]
+
+ Stage: Stage-2
+ Dependency Collection
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.insert_b_good
+
+ Stage: Stage-3
+ Stats Work
+ Basic Stats Work:
+
+PREHOOK: query: insert overwrite table insert_b_good
+ select
+ name,
+ age,
+ gpa,
+ IF(age<40, NULL, age),
+ IF(age>40, NULL, cast('2011-01-01 01:01:01' as timestamp)),
+ IF(LENGTH(name)>8, NULL, name),
+ IF(LENGTH(name)<8, NULL, cast(name as binary)),
+ IF(age>40, NULL, LENGTH(name)),
+ IF(LENGTH(name)> 10, NULL, 2 * gpa)
+ from student_10_lines
+PREHOOK: type: QUERY
+PREHOOK: Input: default@student_10_lines
+PREHOOK: Output: default@insert_b_good
+POSTHOOK: query: insert overwrite table insert_b_good
+ select
+ name,
+ age,
+ gpa,
+ IF(age<40, NULL, age),
+ IF(age>40, NULL, cast('2011-01-01 01:01:01' as timestamp)),
+ IF(LENGTH(name)>8, NULL, name),
+ IF(LENGTH(name)<8, NULL, cast(name as binary)),
+ IF(age>40, NULL, LENGTH(name)),
+ IF(LENGTH(name)> 10, NULL, 2 * gpa)
+ from student_10_lines
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@student_10_lines
+POSTHOOK: Output: default@insert_b_good
+POSTHOOK: Lineage: insert_b_good.a EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), ]
+POSTHOOK: Lineage: insert_b_good.age SIMPLE [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), ]
+POSTHOOK: Lineage: insert_b_good.b EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), ]
+POSTHOOK: Lineage: insert_b_good.c EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ]
+POSTHOOK: Lineage: insert_b_good.d EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ]
+POSTHOOK: Lineage: insert_b_good.e EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), (student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ]
+POSTHOOK: Lineage: insert_b_good.f EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), (student_10_lines)student_10_lines.FieldSchema(name:gpa, type:double, comment:null), ]
+POSTHOOK: Lineage: insert_b_good.gpa SIMPLE [(student_10_lines)student_10_lines.FieldSchema(name:gpa, type:double, comment:null), ]
+POSTHOOK: Lineage: insert_b_good.name SIMPLE [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ]
+name age gpa _c3 _c4 _c5 _c6 _c7 _c8
+PREHOOK: query: select * from insert_b_good
+PREHOOK: type: QUERY
+PREHOOK: Input: default@insert_b_good
+#### A masked pattern was here ####
+POSTHOOK: query: select * from insert_b_good
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@insert_b_good
+#### A masked pattern was here ####
+insert_b_good.name insert_b_good.age insert_b_good.gpa insert_b_good.a insert_b_good.b insert_b_good.c insert_b_good.d insert_b_good.e insert_b_good.f
+George 22 3.8 NULL 2011-01-01 01:01:01 George NULL 6 7.6
+NULL NULL NULL NULL 2011-01-01 01:01:01 NULL NULL NULL NULL
+calvin brown 28 2.7 NULL 2011-01-01 01:01:01 NULL calvin brown 12 NULL
+luke brown 60 1.14 60 NULL NULL luke brown NULL 2.28
+luke king 28 0.47 NULL 2011-01-01 01:01:01 NULL luke king 9 0.94
+nick johnson 34 NULL NULL 2011-01-01 01:01:01 NULL nick johnson 12 NULL
+oscar thompson 35 2.98 NULL 2011-01-01 01:01:01 NULL oscar thompson 14 NULL
+priscilla falkner 55 1.16 55 NULL NULL priscilla falkner NULL NULL
+quinn ovid 19 NULL NULL 2011-01-01 01:01:01 NULL quinn ovid 10 NULL
+tom thompson 42 0.53 42 NULL NULL tom thompson NULL NULL
+ulysses garcia 35 2.74 NULL 2011-01-01 01:01:01 NULL ulysses garcia 14 NULL
+xavier garcia 33 1.06 NULL 2011-01-01 01:01:01 NULL xavier garcia 13 NULL
+PREHOOK: query: create table insert_b_better (name string, age int, gpa double, a int, b timestamp, c string, d binary, e int, f double)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@insert_b_better
+POSTHOOK: query: create table insert_b_better (name string, age int, gpa double, a int, b timestamp, c string, d binary, e int, f double)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@insert_b_better
+PREHOOK: query: explain vectorization detail
+insert overwrite table insert_b_better
+ select
+ name,
+ age,
+ gpa,
+ IF(age<40, NULL, age),
+ IF(age>40, NULL, cast('2011-01-01 01:01:01' as timestamp)),
+ IF(LENGTH(name)>8, NULL, name),
+ IF(LENGTH(name)<8, NULL, cast(name as binary)),
+ IF(age>40, NULL, LENGTH(name)),
+ IF(LENGTH(name)> 10, NULL, 2 * gpa)
+ from student_10_lines
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+insert overwrite table insert_b_better
+ select
+ name,
+ age,
+ gpa,
+ IF(age<40, NULL, age),
+ IF(age>40, NULL, cast('2011-01-01 01:01:01' as timestamp)),
+ IF(LENGTH(name)>8, NULL, name),
+ IF(LENGTH(name)<8, NULL, cast(name as binary)),
+ IF(age>40, NULL, LENGTH(name)),
+ IF(LENGTH(name)> 10, NULL, 2 * gpa)
+ from student_10_lines
+POSTHOOK: type: QUERY
+Explain
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+ Stage-3 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: student_10_lines
+ Statistics: Num rows: 12 Data size: 2352 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:name:string, 1:age:int, 2:gpa:double, 3:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>]
+ Select Operator
+ expressions: name (type: string), age (type: int), gpa (type: double), if((age < 40), null, age) (type: int), if((age > 40), null, TIMESTAMP'2011-01-01 01:01:01.0') (type: timestamp), if((length(name) > 8), null, name) (type: string), if((length(name) < 8), null, CAST( name AS BINARY)) (type: binary), if((age > 40), null, length(name)) (type: int), if((length(name) > 10), null, (2.0D * gpa)) (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 1, 2, 5, 8, 11, 14, 16, 20]
+ selectExpressions: IfExprNullColumn(col 4:boolean, null, col 1)(children: LongColLessLongScalar(col 1:int, val 40) -> 4:boolean, col 1:int) -> 5:int, IfExprNullColumn(col 6:boolean, null, col 7)(children: LongColGreaterLongScalar(col 1:int, val 40) -> 6:boolean, ConstantVectorExpression(val 2011-01-01 01:01:01.0) -> 7:timestamp) -> 8:timestamp, IfExprNullColumn(col 10:boolean, null, col 0)(children: LongColGreaterLongScalar(col 9:int, val 8)(children: StringLength(col 0:string) -> 9:int) -> 10:boolean, col 0:string) -> 11:string, IfExprNullCondExpr(col 12:boolean, null, col 13:binary)(children: LongColLessLongScalar(col 9:int, val 8)(children: StringLength(col 0:string) -> 9:int) -> 12:boolean, VectorUDFAdaptor(CAST( name AS BINARY)) -> 13:binary) -> 14:binary, IfExprNullCondExpr(col 9:boolean, null, col 15:int)(children: LongColGreaterLongScalar(col 1:int, val 40) -> 9:boolean, StringLength(col 0:string) -> 15:int) -> 16:int, IfExprNullCondExpr(col 18:boolea
n, null, col 19:double)(children: LongColGreaterLongScalar(col 17:int, val 10)(children: StringLength(col 0:string) -> 17:int) -> 18:boolean, DoubleScalarMultiplyDoubleColumn(val 2.0, col 2:double) -> 19:double) -> 20:double
+ Statistics: Num rows: 12 Data size: 2352 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 12 Data size: 2352 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.insert_b_better
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: []
+ featureSupportInUse: []
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: true
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 3
+ includeColumns: [0, 1, 2]
+ dataColumns: name:string, age:int, gpa:double
+ partitionColumnCount: 0
+ scratchColumnTypeNames: [bigint, bigint, bigint, timestamp, timestamp, bigint, bigint, string, bigint, string, string, bigint, bigint, bigint, bigint, double, double]
+
+ Stage: Stage-2
+ Dependency Collection
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.insert_b_better
+
+ Stage: Stage-3
+ Stats Work
+ Basic Stats Work:
+
+PREHOOK: query: insert overwrite table insert_b_better
+ select
+ name,
+ age,
+ gpa,
+ IF(age<40, NULL, age),
+ IF(age>40, NULL, cast('2011-01-01 01:01:01' as timestamp)),
+ IF(LENGTH(name)>8, NULL, name),
+ IF(LENGTH(name)<8, NULL, cast(name as binary)),
+ IF(age>40, NULL, LENGTH(name)),
+ IF(LENGTH(name)> 10, NULL, 2 * gpa)
+ from student_10_lines
+PREHOOK: type: QUERY
+PREHOOK: Input: default@student_10_lines
+PREHOOK: Output: default@insert_b_better
+POSTHOOK: query: insert overwrite table insert_b_better
+ select
+ name,
+ age,
+ gpa,
+ IF(age<40, NULL, age),
+ IF(age>40, NULL, cast('2011-01-01 01:01:01' as timestamp)),
+ IF(LENGTH(name)>8, NULL, name),
+ IF(LENGTH(name)<8, NULL, cast(name as binary)),
+ IF(age>40, NULL, LENGTH(name)),
+ IF(LENGTH(name)> 10, NULL, 2 * gpa)
+ from student_10_lines
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@student_10_lines
+POSTHOOK: Output: default@insert_b_better
+POSTHOOK: Lineage: insert_b_better.a EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), ]
+POSTHOOK: Lineage: insert_b_better.age SIMPLE [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), ]
+POSTHOOK: Lineage: insert_b_better.b EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), ]
+POSTHOOK: Lineage: insert_b_better.c EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ]
+POSTHOOK: Lineage: insert_b_better.d EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ]
+POSTHOOK: Lineage: insert_b_better.e EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), (student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ]
+POSTHOOK: Lineage: insert_b_better.f EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), (student_10_lines)student_10_lines.FieldSchema(name:gpa, type:double, comment:null), ]
+POSTHOOK: Lineage: insert_b_better.gpa SIMPLE [(student_10_lines)student_10_lines.FieldSchema(name:gpa, type:double, comment:null), ]
+POSTHOOK: Lineage: insert_b_better.name SIMPLE [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ]
+name age gpa _c3 _c4 _c5 _c6 _c7 _c8
+PREHOOK: query: select * from insert_b_better
+PREHOOK: type: QUERY
+PREHOOK: Input: default@insert_b_better
+#### A masked pattern was here ####
+POSTHOOK: query: select * from insert_b_better
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@insert_b_better
+#### A masked pattern was here ####
+insert_b_better.name insert_b_better.age insert_b_better.gpa insert_b_better.a insert_b_better.b insert_b_better.c insert_b_better.d insert_b_better.e insert_b_better.f
+George 22 3.8 NULL 2011-01-01 01:01:01 George NULL 6 7.6
+NULL NULL NULL NULL 2011-01-01 01:01:01 NULL NULL NULL NULL
+calvin brown 28 2.7 NULL 2011-01-01 01:01:01 NULL calvin brown 12 NULL
+luke brown 60 1.14 60 NULL NULL luke brown NULL 2.28
+luke king 28 0.47 NULL 2011-01-01 01:01:01 NULL luke king 9 0.94
+nick johnson 34 NULL NULL 2011-01-01 01:01:01 NULL nick johnson 12 NULL
+oscar thompson 35 2.98 NULL 2011-01-01 01:01:01 NULL oscar thompson 14 NULL
+priscilla falkner 55 1.16 55 NULL NULL priscilla falkner NULL NULL
+quinn ovid 19 NULL NULL 2011-01-01 01:01:01 NULL quinn ovid 10 NULL
+tom thompson 42 0.53 42 NULL NULL tom thompson NULL NULL
+ulysses garcia 35 2.74 NULL 2011-01-01 01:01:01 NULL ulysses garcia 14 NULL
+xavier garcia 33 1.06 NULL 2011-01-01 01:01:01 NULL xavier garcia 13 NULL
http://git-wip-us.apache.org/repos/asf/hive/blob/53980ba6/ql/src/test/results/clientpositive/llap/vector_when_case_null.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_when_case_null.q.out b/ql/src/test/results/clientpositive/llap/vector_when_case_null.q.out
index 8d3f163..de30ca7 100644
--- a/ql/src/test/results/clientpositive/llap/vector_when_case_null.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_when_case_null.q.out
@@ -51,13 +51,13 @@ STAGE PLANS:
Select Vectorization:
className: VectorSelectOperator
native: true
- projectedOutputColumnNums: [0, 6]
- selectExpressions: IfExprLongScalarLongColumn(col 1:boolean, val 1, col 5:int)(children: IfExprColumnNull(col 3:boolean, col 4:int, null)(children: NotCol(col 1:boolean) -> 3:boolean, ConstantVectorExpression(val 0) -> 4:int) -> 5:int) -> 6:int
+ projectedOutputColumnNums: [0, 7]
+ selectExpressions: IfExprColumnCondExpr(col 1:boolean, col 3:intcol 6:int)(children: col 1:boolean, ConstantVectorExpression(val 1) -> 3:int, IfExprColumnNull(col 4:boolean, col 5:int, null)(children: NotCol(col 1:boolean) -> 4:boolean, ConstantVectorExpression(val 0) -> 5:int) -> 6:int) -> 7:int
Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count(_col1)
Group By Vectorization:
- aggregators: VectorUDAFCount(col 6:int) -> bigint
+ aggregators: VectorUDAFCount(col 7:int) -> bigint
className: VectorGroupByOperator
groupByMode: HASH
keyExpressions: col 0:string
http://git-wip-us.apache.org/repos/asf/hive/blob/53980ba6/ql/src/test/results/clientpositive/llap/vectorized_case.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorized_case.q.out b/ql/src/test/results/clientpositive/llap/vectorized_case.q.out
index 9143816..19d9e39 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_case.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_case.q.out
@@ -68,8 +68,8 @@ STAGE PLANS:
Select Vectorization:
className: VectorSelectOperator
native: true
- projectedOutputColumnNums: [1, 16, 17]
- selectExpressions: IfExprStringScalarStringGroupColumn(col 13:boolean, val acol 15:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 13:boolean, IfExprStringScalarStringScalar(col 14:boolean, val b, val c)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 14:boolean) -> 15:string) -> 16:string, IfExprStringScalarStringGroupColumn(col 13:boolean, val acol 15:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 13:boolean, IfExprStringScalarStringScalar(col 14:boolean, val b, val c)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 14:boolean) -> 15:string) -> 17:string
+ projectedOutputColumnNums: [1, 17, 21]
+ selectExpressions: IfExprColumnCondExpr(col 13:boolean, col 14:stringcol 16:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 13:boolean, ConstantVectorExpression(val a) -> 14:string, IfExprStringScalarStringScalar(col 15:boolean, val b, val c)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 15:boolean) -> 16:string) -> 17:string, IfExprColumnCondExpr(col 15:boolean, col 18:stringcol 20:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 15:boolean, ConstantVectorExpression(val a) -> 18:string, IfExprStringScalarStringScalar(col 19:boolean, val b, val c)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 19:boolean) -> 20:string) -> 21:string
Statistics: Num rows: 6 Data size: 2228 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
@@ -97,7 +97,7 @@ STAGE PLANS:
includeColumns: [1]
dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
partitionColumnCount: 0
- scratchColumnTypeNames: [bigint, bigint, string, string, string]
+ scratchColumnTypeNames: [bigint, string, bigint, string, string, string, bigint, string, string]
Stage: Stage-0
Fetch Operator
@@ -217,8 +217,8 @@ STAGE PLANS:
Select Vectorization:
className: VectorSelectOperator
native: true
- projectedOutputColumnNums: [1, 17, 20]
- selectExpressions: IfExprStringScalarStringGroupColumn(col 13:boolean, val acol 16:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 13:boolean, IfExprColumnNull(col 14:boolean, col 15:string, null)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 14:boolean, ConstantVectorExpression(val b) -> 15:string) -> 16:string) -> 17:string, IfExprStringScalarStringGroupColumn(col 13:boolean, val acol 19:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 13:boolean, IfExprNullColumn(col 18:boolean, null, col 16)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 18:boolean, ConstantVectorExpression(val c) -> 16:string) -> 19:string) -> 20:string
+ projectedOutputColumnNums: [1, 18, 24]
+ selectExpressions: IfExprColumnCondExpr(col 13:boolean, col 14:stringcol 17:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 13:boolean, ConstantVectorExpression(val a) -> 14:string, IfExprColumnNull(col 15:boolean, col 16:string, null)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 15:boolean, ConstantVectorExpression(val b) -> 16:string) -> 17:string) -> 18:string, IfExprColumnCondExpr(col 19:boolean, col 20:stringcol 23:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 19:boolean, ConstantVectorExpression(val a) -> 20:string, IfExprNullColumn(col 21:boolean, null, col 22)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 21:boolean, ConstantVectorExpression(val c) -> 22:string) -> 23:string) -> 24:string
Statistics: Num rows: 6 Data size: 2228 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
@@ -246,7 +246,7 @@ STAGE PLANS:
includeColumns: [1]
dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
partitionColumnCount: 0
- scratchColumnTypeNames: [bigint, bigint, string, string, string, bigint, string, string]
+ scratchColumnTypeNames: [bigint, string, bigint, string, string, string, bigint, string, bigint, string, string, string]
Stage: Stage-0
Fetch Operator
@@ -594,7 +594,7 @@ STAGE PLANS:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [6]
- selectExpressions: VectorUDFAdaptor(if((member = 1), (attr + 1), (attr + 2)))(children: VectorUDFAdaptor((member = 1)) -> 3:boolean, DecimalColAddDecimalScalar(col 1:decimal(10,0), val 1) -> 4:decimal(11,0), DecimalColAddDecimalScalar(col 1:decimal(10,0), val 2) -> 5:decimal(11,0)) -> 6:decimal(11,0)
+ selectExpressions: IfExprCondExprCondExpr(col 3:boolean, col 4:decimal(11,0)col 5:decimal(11,0))(children: VectorUDFAdaptor((member = 1)) -> 3:boolean, DecimalColAddDecimalScalar(col 1:decimal(10,0), val 1) -> 4:decimal(11,0), DecimalColAddDecimalScalar(col 1:decimal(10,0), val 2) -> 5:decimal(11,0)) -> 6:decimal(11,0)
Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
@@ -674,8 +674,8 @@ STAGE PLANS:
Select Vectorization:
className: VectorSelectOperator
native: true
- projectedOutputColumnNums: [5]
- selectExpressions: VectorUDFAdaptor(if((member = 1), 1, (attr + 2)))(children: VectorUDFAdaptor((member = 1)) -> 3:boolean, DecimalColAddDecimalScalar(col 1:decimal(10,0), val 2) -> 4:decimal(11,0)) -> 5:decimal(11,0)
+ projectedOutputColumnNums: [6]
+ selectExpressions: IfExprColumnCondExpr(col 3:boolean, col 4:decimal(1,0)col 5:decimal(11,0))(children: VectorUDFAdaptor((member = 1)) -> 3:boolean, ConstantVectorExpression(val 1) -> 4:decimal(1,0), DecimalColAddDecimalScalar(col 1:decimal(10,0), val 2) -> 5:decimal(11,0)) -> 6:decimal(11,0)
Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
@@ -703,7 +703,7 @@ STAGE PLANS:
includeColumns: [0, 1]
dataColumns: member:decimal(10,0), attr:decimal(10,0)
partitionColumnCount: 0
- scratchColumnTypeNames: [bigint, decimal(11,0), decimal(11,0)]
+ scratchColumnTypeNames: [bigint, decimal(1,0), decimal(11,0), decimal(11,0)]
Stage: Stage-0
Fetch Operator
@@ -755,8 +755,8 @@ STAGE PLANS:
Select Vectorization:
className: VectorSelectOperator
native: true
- projectedOutputColumnNums: [5]
- selectExpressions: VectorUDFAdaptor(if((member = 1), (attr + 1), 2))(children: VectorUDFAdaptor((member = 1)) -> 3:boolean, DecimalColAddDecimalScalar(col 1:decimal(10,0), val 1) -> 4:decimal(11,0)) -> 5:decimal(11,0)
+ projectedOutputColumnNums: [6]
+ selectExpressions: IfExprCondExprColumn(col 3:boolean, col 4:decimal(11,0), col 5:decimal(1,0))(children: VectorUDFAdaptor((member = 1)) -> 3:boolean, DecimalColAddDecimalScalar(col 1:decimal(10,0), val 1) -> 4:decimal(11,0), ConstantVectorExpression(val 2) -> 5:decimal(1,0)) -> 6:decimal(11,0)
Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
@@ -784,7 +784,7 @@ STAGE PLANS:
includeColumns: [0, 1]
dataColumns: member:decimal(10,0), attr:decimal(10,0)
partitionColumnCount: 0
- scratchColumnTypeNames: [bigint, decimal(11,0), decimal(11,0)]
+ scratchColumnTypeNames: [bigint, decimal(11,0), decimal(1,0), decimal(11,0)]
Stage: Stage-0
Fetch Operator
@@ -855,7 +855,7 @@ STAGE PLANS:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [6]
- selectExpressions: IfExprLongColumnLongColumn(col 3:boolean, col 4:bigint, col 5:bigint)(children: LongColEqualLongScalar(col 0:bigint, val 1) -> 3:boolean, LongColAddLongScalar(col 1:bigint, val 1) -> 4:bigint, LongColAddLongScalar(col 1:bigint, val 2) -> 5:bigint) -> 6:bigint
+ selectExpressions: IfExprCondExprCondExpr(col 3:boolean, col 4:bigintcol 5:bigint)(children: LongColEqualLongScalar(col 0:bigint, val 1) -> 3:boolean, LongColAddLongScalar(col 1:bigint, val 1) -> 4:bigint, LongColAddLongScalar(col 1:bigint, val 2) -> 5:bigint) -> 6:bigint
Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
@@ -936,7 +936,7 @@ STAGE PLANS:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [5]
- selectExpressions: IfExprNullColumn(col 3:boolean, null, col 4)(children: LongColEqualLongScalar(col 0:bigint, val 1) -> 3:boolean, LongColAddLongScalar(col 1:bigint, val 2) -> 4:bigint) -> 5:bigint
+ selectExpressions: IfExprNullCondExpr(col 3:boolean, null, col 4:bigint)(children: LongColEqualLongScalar(col 0:bigint, val 1) -> 3:boolean, LongColAddLongScalar(col 1:bigint, val 2) -> 4:bigint) -> 5:bigint
Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
@@ -1017,7 +1017,7 @@ STAGE PLANS:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [5]
- selectExpressions: IfExprColumnNull(col 3:boolean, col 4:bigint, null)(children: LongColEqualLongScalar(col 0:bigint, val 1) -> 3:boolean, LongColAddLongScalar(col 1:bigint, val 1) -> 4:bigint) -> 5:bigint
+ selectExpressions: IfExprCondExprNull(col 3:boolean, col 4:bigint, null)(children: LongColEqualLongScalar(col 0:bigint, val 1) -> 3:boolean, LongColAddLongScalar(col 1:bigint, val 1) -> 4:bigint) -> 5:bigint
Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false