You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jd...@apache.org on 2018/10/11 01:11:30 UTC
hive git commit: HIVE-20705: Vectorization: Native Vector MapJoin
doesn't support Complex Big Table values (Matt McCline,
reviewed by Jason Dere)
Repository: hive
Updated Branches:
refs/heads/master 64bef36a3 -> 37c7fd783
HIVE-20705: Vectorization: Native Vector MapJoin doesn't support Complex Big Table values (Matt McCline, reviewed by Jason Dere)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/37c7fd78
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/37c7fd78
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/37c7fd78
Branch: refs/heads/master
Commit: 37c7fd7833eba087eadd8048dbc63b403b272104
Parents: 64bef36
Author: Jason Dere <jd...@hortonworks.com>
Authored: Wed Oct 10 18:11:02 2018 -0700
Committer: Jason Dere <jd...@hortonworks.com>
Committed: Wed Oct 10 18:11:02 2018 -0700
----------------------------------------------------------------------
.../test/resources/testconfiguration.properties | 1 +
.../hive/ql/optimizer/physical/Vectorizer.java | 18 +-
.../apache/hadoop/hive/ql/plan/MapJoinDesc.java | 10 +
.../hadoop/hive/ql/plan/VectorMapJoinDesc.java | 14 +
.../vector_mapjoin_complex_values.q | 34 ++
.../llap/vector_mapjoin_complex_values.q.out | 355 +++++++++++++++++++
6 files changed, 430 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/37c7fd78/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 97609cf..9a87464 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -800,6 +800,7 @@ minillaplocal.query.files=\
vector_like_2.q,\
vector_llap_io_data_conversion.q,\
vector_llap_text_1.q,\
+ vector_mapjoin_complex_values.q,\
vector_mapjoin_reduce.q,\
vector_null_map.q,\
vector_number_compare_projection.q,\
http://git-wip-us.apache.org/repos/asf/hive/blob/37c7fd78/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
index e93d666..7f48dc5 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
@@ -3623,6 +3623,9 @@ public class Vectorizer implements PhysicalPlanResolver {
*
* Value expressions include keys? YES.
*/
+ boolean supportsValueTypes = true; // Assume.
+ HashSet<String> notSupportedValueTypes = new HashSet<String>();
+
int[] bigTableValueColumnMap = new int[allBigTableValueExpressions.length];
String[] bigTableValueColumnNames = new String[allBigTableValueExpressions.length];
TypeInfo[] bigTableValueTypeInfos = new TypeInfo[allBigTableValueExpressions.length];
@@ -3637,7 +3640,13 @@ public class Vectorizer implements PhysicalPlanResolver {
ExprNodeDesc exprNode = bigTableExprs.get(i);
bigTableValueColumnNames[i] = exprNode.toString();
- bigTableValueTypeInfos[i] = exprNode.getTypeInfo();
+ TypeInfo typeInfo = exprNode.getTypeInfo();
+ if (!(typeInfo instanceof PrimitiveTypeInfo)) {
+ supportsValueTypes = false;
+ Category category = typeInfo.getCategory();
+ notSupportedValueTypes.add(category.toString());
+ }
+ bigTableValueTypeInfos[i] = typeInfo;
}
if (bigTableValueExpressionsList.size() == 0) {
slimmedBigTableValueExpressions = null;
@@ -3880,6 +3889,10 @@ public class Vectorizer implements PhysicalPlanResolver {
if (!supportsKeyTypes) {
vectorDesc.setNotSupportedKeyTypes(new ArrayList(notSupportedKeyTypes));
}
+ vectorDesc.setSupportsValueTypes(supportsValueTypes);
+ if (!supportsValueTypes) {
+ vectorDesc.setNotSupportedValueTypes(new ArrayList(notSupportedValueTypes));
+ }
// Check common conditions for both Optimized and Fast Hash Tables.
boolean result = true; // Assume.
@@ -3889,7 +3902,8 @@ public class Vectorizer implements PhysicalPlanResolver {
!oneMapJoinCondition ||
hasNullSafes ||
!smallTableExprVectorizes ||
- outerJoinHasNoKeys) {
+ outerJoinHasNoKeys ||
+ !supportsValueTypes) {
result = false;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/37c7fd78/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java
index 507114b..093a629 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java
@@ -573,6 +573,16 @@ public class MapJoinDesc extends JoinDesc implements Serializable {
vectorMapJoinDesc.getSupportsKeyTypes(),
"Optimized Table and Supports Key Types"));
}
+ final boolean supportsValueTypes = vectorMapJoinDesc.getSupportsValueTypes();
+ if (!supportsValueTypes) {
+
+ // Only add this condition when false to avoid mega-Q file update.
+ conditionList.add(
+ new VectorizationCondition(
+ false,
+ "Supports Value Types " +
+ vectorMapJoinDesc.getNotSupportedValueTypes().toString()));
+ }
VectorizationCondition[] conditions =
conditionList.toArray(new VectorizationCondition[0]);
http://git-wip-us.apache.org/repos/asf/hive/blob/37c7fd78/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinDesc.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinDesc.java
index 89a07b4..a0ee3a9 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinDesc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinDesc.java
@@ -204,6 +204,8 @@ public class VectorMapJoinDesc extends AbstractVectorDesc {
private boolean isHybridHashJoin;
private boolean supportsKeyTypes;
private List<String> notSupportedKeyTypes;
+ private boolean supportsValueTypes;
+ private List<String> notSupportedValueTypes;
private boolean smallTableExprVectorizes;
private boolean outerJoinHasNoKeys;
boolean isFullOuter;
@@ -250,6 +252,18 @@ public class VectorMapJoinDesc extends AbstractVectorDesc {
public List<String> getNotSupportedKeyTypes() {
return notSupportedKeyTypes;
}
+ public void setSupportsValueTypes(boolean supportsValueTypes) {
+ this.supportsValueTypes = supportsValueTypes;
+ }
+ public boolean getSupportsValueTypes() {
+ return supportsValueTypes;
+ }
+ public void setNotSupportedValueTypes(List<String> notSupportedValueTypes) {
+ this.notSupportedValueTypes = notSupportedValueTypes;
+ }
+ public List<String> getNotSupportedValueTypes() {
+ return notSupportedValueTypes;
+ }
public void setSmallTableExprVectorizes(boolean smallTableExprVectorizes) {
this.smallTableExprVectorizes = smallTableExprVectorizes;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/37c7fd78/ql/src/test/queries/clientpositive/vector_mapjoin_complex_values.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_mapjoin_complex_values.q b/ql/src/test/queries/clientpositive/vector_mapjoin_complex_values.q
new file mode 100644
index 0000000..1c88daa
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vector_mapjoin_complex_values.q
@@ -0,0 +1,34 @@
+set hive.mapred.mode=nonstrict;
+set hive.explain.user=false;
+set hive.vectorized.execution.enabled=true;
+set hive.auto.convert.join=true;
+set hive.mapjoin.hybridgrace.hashtable=false;
+set hive.fetch.task.conversion=none;
+set hive.cli.print.header=true;
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+
+create table census(
+ssn int,
+name string,
+city string,
+email string)
+row format delimited
+fields terminated by ',';
+
+insert into census values(100,"raj","san jose","email");
+
+create table census_clus(
+ssn int,
+name string,
+city string,
+email string)
+clustered by (ssn) into 4 buckets stored as orc TBLPROPERTIES ('transactional'='true');
+
+insert into table census_clus select * from census;
+
+EXPLAIN VECTORIZATION DETAIL
+UPDATE census_clus SET name = 'updated name' where ssn=100 and EXISTS (select distinct ssn from census where ssn=census_clus.ssn);
+
+UPDATE census_clus SET name = 'updated name' where ssn=100 and EXISTS (select distinct ssn from census where ssn=census_clus.ssn);
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/37c7fd78/ql/src/test/results/clientpositive/llap/vector_mapjoin_complex_values.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_mapjoin_complex_values.q.out b/ql/src/test/results/clientpositive/llap/vector_mapjoin_complex_values.q.out
new file mode 100644
index 0000000..36290ae
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/vector_mapjoin_complex_values.q.out
@@ -0,0 +1,355 @@
+PREHOOK: query: create table census(
+ssn int,
+name string,
+city string,
+email string)
+row format delimited
+fields terminated by ','
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@census
+POSTHOOK: query: create table census(
+ssn int,
+name string,
+city string,
+email string)
+row format delimited
+fields terminated by ','
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@census
+PREHOOK: query: insert into census values(100,"raj","san jose","email")
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@census
+POSTHOOK: query: insert into census values(100,"raj","san jose","email")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@census
+POSTHOOK: Lineage: census.city SCRIPT []
+POSTHOOK: Lineage: census.email SCRIPT []
+POSTHOOK: Lineage: census.name SCRIPT []
+POSTHOOK: Lineage: census.ssn SCRIPT []
+col1 col2 col3 col4
+PREHOOK: query: create table census_clus(
+ssn int,
+name string,
+city string,
+email string)
+clustered by (ssn) into 4 buckets stored as orc TBLPROPERTIES ('transactional'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@census_clus
+POSTHOOK: query: create table census_clus(
+ssn int,
+name string,
+city string,
+email string)
+clustered by (ssn) into 4 buckets stored as orc TBLPROPERTIES ('transactional'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@census_clus
+PREHOOK: query: insert into table census_clus select * from census
+PREHOOK: type: QUERY
+PREHOOK: Input: default@census
+PREHOOK: Output: default@census_clus
+POSTHOOK: query: insert into table census_clus select * from census
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@census
+POSTHOOK: Output: default@census_clus
+POSTHOOK: Lineage: census_clus.city SIMPLE [(census)census.FieldSchema(name:city, type:string, comment:null), ]
+POSTHOOK: Lineage: census_clus.email SIMPLE [(census)census.FieldSchema(name:email, type:string, comment:null), ]
+POSTHOOK: Lineage: census_clus.name SIMPLE [(census)census.FieldSchema(name:name, type:string, comment:null), ]
+POSTHOOK: Lineage: census_clus.ssn SIMPLE [(census)census.FieldSchema(name:ssn, type:int, comment:null), ]
+census.ssn census.name census.city census.email
+PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
+UPDATE census_clus SET name = 'updated name' where ssn=100 and EXISTS (select distinct ssn from census where ssn=census_clus.ssn)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@census
+PREHOOK: Input: default@census_clus
+PREHOOK: Output: default@census_clus
+POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
+UPDATE census_clus SET name = 'updated name' where ssn=100 and EXISTS (select distinct ssn from census where ssn=census_clus.ssn)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@census
+POSTHOOK: Input: default@census_clus
+POSTHOOK: Output: default@census_clus
+Explain
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+ Stage-3 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Map 1 <- Reducer 4 (BROADCAST_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 4 <- Map 3 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: census_clus
+ filterExpr: (ssn = 100) (type: boolean)
+ Statistics: Num rows: 1 Data size: 185 Basic stats: COMPLETE Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:ssn:int, 1:name:string, 2:city:string, 3:email:string, 4:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+ Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: FilterLongColEqualLongScalar(col 0:int, val 100)
+ predicate: (ssn = 100) (type: boolean)
+ Statistics: Num rows: 1 Data size: 185 Basic stats: COMPLETE Column stats: COMPLETE
+ Map Join Operator
+ condition map:
+ Left Semi Join 0 to 1
+ keys:
+ 0 100 (type: int)
+ 1 100 (type: int)
+ Map Join Vectorization:
+ bigTableKeyExpressions: ConstantVectorExpression(val 100) -> 5:int
+ bigTableValueExpressions: col 2:string, col 3:string, col 4:struct<writeid:bigint,bucketid:int,rowid:bigint>
+ className: VectorMapJoinOperator
+ native: false
+ nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true
+ nativeConditionsNotMet: Supports Value Types [STRUCT] IS false
+ outputColumnNames: _col2, _col3, _col6
+ input vertices:
+ 1 Reducer 4
+ Statistics: Num rows: 1 Data size: 257 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col6 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), _col2 (type: string), _col3 (type: string)
+ outputColumnNames: _col0, _col3, _col4
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [2, 0, 1]
+ Statistics: Num rows: 1 Data size: 357 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ sort order: +
+ Map-reduce partition columns: UDFToInteger(_col0) (type: int)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ keyColumns: 2:struct<writeid:bigint,bucketid:int,rowid:bigint>
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ partitionColumns: 3:int
+ valueColumns: 0:string, 1:string
+ Statistics: Num rows: 1 Data size: 357 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col3 (type: string), _col4 (type: string)
+ Execution mode: vectorized, llap
+ LLAP IO: may be used (ACID table)
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ featureSupportInUse: [DECIMAL_64]
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 4
+ includeColumns: [0, 2, 3]
+ dataColumns: ssn:int, name:string, city:string, email:string
+ neededVirtualColumns: [ROWID]
+ partitionColumnCount: 0
+ scratchColumnTypeNames: [bigint]
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: census
+ filterExpr: (ssn = 100) (type: boolean)
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:ssn:int, 1:name:string, 2:city:string, 3:email:string, 4:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+ Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: FilterLongColEqualLongScalar(col 0:int, val 100)
+ predicate: (ssn = 100) (type: boolean)
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: []
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ Group By Vectorization:
+ className: VectorGroupByOperator
+ groupByMode: HASH
+ keyExpressions: ConstantVectorExpression(val 100) -> 5:int
+ native: false
+ vectorProcessingMode: HASH
+ projectedOutputColumnNums: []
+ keys: 100 (type: int)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: 100 (type: int)
+ sort order: +
+ Map-reduce partition columns: 100 (type: int)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkLongOperator
+ keyColumns: 1:int
+ keyExpressions: ConstantVectorExpression(val 100) -> 1:int
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ featureSupportInUse: [DECIMAL_64]
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 4
+ includeColumns: [0]
+ dataColumns: ssn:int, name:string, city:string, email:string
+ partitionColumnCount: 0
+ scratchColumnTypeNames: [bigint]
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ reduceColumnNullOrder: z
+ reduceColumnSortOrder: +
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 3
+ dataColumns: KEY.reducesinkkey0:struct<writeid:bigint,bucketid:int,rowid:bigint>, VALUE._col1:string, VALUE._col2:string
+ partitionColumnCount: 0
+ scratchColumnTypeNames: [bigint, string]
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), 100 (type: int), 'updated name' (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 3, 4, 1, 2]
+ selectExpressions: ConstantVectorExpression(val 100) -> 3:int, ConstantVectorExpression(val updated name) -> 4:string
+ Statistics: Num rows: 1 Data size: 357 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 1 Data size: 357 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.census_clus
+ Write Type: UPDATE
+ Reducer 4
+ Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ reduceColumnNullOrder: a
+ reduceColumnSortOrder: +
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 1
+ dataColumns: KEY._col0:int
+ partitionColumnCount: 0
+ scratchColumnTypeNames: [bigint, bigint]
+ Reduce Operator Tree:
+ Group By Operator
+ Group By Vectorization:
+ className: VectorGroupByOperator
+ groupByMode: MERGEPARTIAL
+ keyExpressions: ConstantVectorExpression(val 100) -> 1:int, ConstantVectorExpression(val 100) -> 2:int
+ native: false
+ vectorProcessingMode: MERGE_PARTIAL
+ projectedOutputColumnNums: []
+ keys: 100 (type: int), 100 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: []
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ Group By Vectorization:
+ className: VectorGroupByOperator
+ groupByMode: HASH
+ keyExpressions: ConstantVectorExpression(val 100) -> 2:int
+ native: false
+ vectorProcessingMode: HASH
+ projectedOutputColumnNums: []
+ keys: 100 (type: int)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: 100 (type: int)
+ sort order: +
+ Map-reduce partition columns: 100 (type: int)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkLongOperator
+ keyColumns: 1:int
+ keyExpressions: ConstantVectorExpression(val 100) -> 1:int
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+
+ Stage: Stage-2
+ Dependency Collection
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: false
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.census_clus
+ Write Type: UPDATE
+
+ Stage: Stage-3
+ Stats Work
+ Basic Stats Work:
+
+PREHOOK: query: UPDATE census_clus SET name = 'updated name' where ssn=100 and EXISTS (select distinct ssn from census where ssn=census_clus.ssn)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@census
+PREHOOK: Input: default@census_clus
+PREHOOK: Output: default@census_clus
+POSTHOOK: query: UPDATE census_clus SET name = 'updated name' where ssn=100 and EXISTS (select distinct ssn from census where ssn=census_clus.ssn)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@census
+POSTHOOK: Input: default@census_clus
+POSTHOOK: Output: default@census_clus
+row__id ssn _c2 city email