You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ek...@apache.org on 2016/05/04 02:12:17 UTC
hive git commit: HIVE-13646 make hive.optimize.sort.dynamic.partition
compatible with ACID tables (Eugene Koifman, reviewed by Wei Zheng)
Repository: hive
Updated Branches:
refs/heads/master 70fe31088 -> 872996629
HIVE-13646 make hive.optimize.sort.dynamic.partition compatible with ACID tables (Eugene Koifman, reviewed by Wei Zheng)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/87299662
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/87299662
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/87299662
Branch: refs/heads/master
Commit: 8729966296a041b7ea952ba67f148d2c48c27749
Parents: 70fe310
Author: Eugene Koifman <ek...@hortonworks.com>
Authored: Tue May 3 17:11:47 2016 -0700
Committer: Eugene Koifman <ek...@hortonworks.com>
Committed: Tue May 3 17:11:47 2016 -0700
----------------------------------------------------------------------
.../hadoop/hive/ql/parse/SemanticAnalyzer.java | 1 -
.../dynpart_sort_optimization_acid.q.out | 120 +++++++++++++++----
2 files changed, 100 insertions(+), 21 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/87299662/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 06db7f9..2983d38 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -7030,7 +7030,6 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
conf.setBoolVar(ConfVars.HIVEOPTREDUCEDEDUPLICATION, true);
conf.setIntVar(ConfVars.HIVEOPTREDUCEDEDUPLICATIONMINREDUCER, 1);
conf.set(AcidUtils.CONF_ACID_KEY, "true");
- conf.setBoolVar(ConfVars.HIVEOPTSORTDYNAMICPARTITION, false);
if (table.getNumBuckets() < 1) {
throw new SemanticException(ErrorMsg.ACID_OP_ON_NONACID_TABLE, table.getTableName());
http://git-wip-us.apache.org/repos/asf/hive/blob/87299662/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid.q.out b/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid.q.out
index eca29df..62399e3 100644
--- a/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid.q.out
+++ b/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid.q.out
@@ -380,8 +380,9 @@ POSTHOOK: query: explain update acid set value = 'bar' where key = 'foo' and ds
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
- Stage-2 depends on stages: Stage-0
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+ Stage-3 depends on stages: Stage-0
STAGE PLANS:
Stage: Stage-1
@@ -397,12 +398,31 @@ STAGE PLANS:
Reduce Output Operator
key expressions: _col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>)
sort order: +
- Map-reduce partition columns: UDFToInteger(_col0) (type: int)
value expressions: _col3 (type: string)
Reduce Operator Tree:
Select Operator
- expressions: KEY.reducesinkkey0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), 'foo' (type: string), 'bar' (type: string), VALUE._col2 (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3
+ expressions: KEY.reducesinkkey0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), VALUE._col2 (type: string)
+ outputColumnNames: _col0, _col3
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col3 (type: string), '_bucket_number' (type: string), _col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>)
+ sort order: +++
+ Map-reduce partition columns: _col3 (type: string)
+ value expressions: 'foo' (type: string), 'bar' (type: string)
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY._col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY._col3 (type: string), KEY.'_bucket_number' (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, '_bucket_number'
File Output Operator
compressed: false
table:
@@ -423,7 +443,7 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.acid
- Stage: Stage-2
+ Stage: Stage-3
Stats-Aggr Operator
PREHOOK: query: update acid set value = 'bar' where key = 'foo' and ds in ('2008-04-08')
@@ -875,8 +895,9 @@ POSTHOOK: query: explain update acid set value = 'bar' where key = 'foo' and ds=
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
- Stage-2 depends on stages: Stage-0
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+ Stage-3 depends on stages: Stage-0
STAGE PLANS:
Stage: Stage-1
@@ -892,12 +913,31 @@ STAGE PLANS:
Reduce Output Operator
key expressions: _col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>)
sort order: +
- Map-reduce partition columns: UDFToInteger(_col0) (type: int)
value expressions: _col4 (type: int)
Reduce Operator Tree:
Select Operator
- expressions: KEY.reducesinkkey0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), 'foo' (type: string), 'bar' (type: string), '2008-04-08' (type: string), VALUE._col3 (type: int)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ expressions: KEY.reducesinkkey0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), VALUE._col3 (type: int)
+ outputColumnNames: _col0, _col4
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: '2008-04-08' (type: string), _col4 (type: int), '_bucket_number' (type: string), _col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>)
+ sort order: ++++
+ Map-reduce partition columns: '2008-04-08' (type: string), _col4 (type: int)
+ value expressions: 'foo' (type: string), 'bar' (type: string)
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY._col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: int), KEY.'_bucket_number' (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, '_bucket_number'
File Output Operator
compressed: false
table:
@@ -919,7 +959,7 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.acid
- Stage: Stage-2
+ Stage: Stage-3
Stats-Aggr Operator
PREHOOK: query: update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr>=11
@@ -1053,8 +1093,9 @@ POSTHOOK: query: explain update acid set value = 'bar' where key = 'foo' and ds=
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
- Stage-2 depends on stages: Stage-0
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+ Stage-3 depends on stages: Stage-0
STAGE PLANS:
Stage: Stage-1
@@ -1070,7 +1111,6 @@ STAGE PLANS:
Reduce Output Operator
key expressions: _col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>)
sort order: +
- Map-reduce partition columns: UDFToInteger(_col0) (type: int)
value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int)
Reduce Operator Tree:
Select Operator
@@ -1079,6 +1119,26 @@ STAGE PLANS:
File Output Operator
compressed: false
table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col3 (type: string), _col4 (type: int), '_bucket_number' (type: string), _col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>)
+ sort order: ++++
+ Map-reduce partition columns: _col3 (type: string), _col4 (type: int)
+ value expressions: _col1 (type: string), _col2 (type: string)
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY._col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: int), KEY.'_bucket_number' (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, '_bucket_number'
+ File Output Operator
+ compressed: false
+ table:
input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -1097,7 +1157,7 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.acid
- Stage: Stage-2
+ Stage: Stage-3
Stats-Aggr Operator
PREHOOK: query: update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr=11
@@ -1127,8 +1187,9 @@ POSTHOOK: query: explain update acid set value = 'bar' where key = 'foo' and ds=
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
- Stage-2 depends on stages: Stage-0
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+ Stage-3 depends on stages: Stage-0
STAGE PLANS:
Stage: Stage-1
@@ -1144,7 +1205,6 @@ STAGE PLANS:
Reduce Output Operator
key expressions: _col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>)
sort order: +
- Map-reduce partition columns: UDFToInteger(_col0) (type: int)
value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int)
Reduce Operator Tree:
Select Operator
@@ -1153,6 +1213,26 @@ STAGE PLANS:
File Output Operator
compressed: false
table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col3 (type: string), _col4 (type: int), '_bucket_number' (type: string), _col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>)
+ sort order: ++++
+ Map-reduce partition columns: _col3 (type: string), _col4 (type: int)
+ value expressions: _col1 (type: string), _col2 (type: string)
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY._col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: int), KEY.'_bucket_number' (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, '_bucket_number'
+ File Output Operator
+ compressed: false
+ table:
input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -1171,7 +1251,7 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.acid
- Stage: Stage-2
+ Stage: Stage-3
Stats-Aggr Operator
PREHOOK: query: update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr>=11