You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by se...@apache.org on 2018/08/23 19:20:57 UTC
[1/3] hive git commit: HIVE-20443 : txn stats cleanup in compaction
txn handler is unneeded (Sergey Shelukhin, reviewed by Eugene Koifman)
Repository: hive
Updated Branches:
refs/heads/master 236a32c64 -> 611770ddf
HIVE-20443 : txn stats cleanup in compaction txn handler is unneeded (Sergey Shelukhin, reviewed by Eugene Koifman)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/5c3ed065
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/5c3ed065
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/5c3ed065
Branch: refs/heads/master
Commit: 5c3ed065ab2636618548e9461a6e5d74c83c79f8
Parents: 236a32c
Author: sergey <se...@apache.org>
Authored: Thu Aug 23 12:11:37 2018 -0700
Committer: sergey <se...@apache.org>
Committed: Thu Aug 23 12:11:37 2018 -0700
----------------------------------------------------------------------
.../metastore/txn/CompactionTxnHandler.java | 52 +-------------------
1 file changed, 2 insertions(+), 50 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/5c3ed065/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/CompactionTxnHandler.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/CompactionTxnHandler.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/CompactionTxnHandler.java
index 1f559e9..cbb76d5 100644
--- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/CompactionTxnHandler.java
+++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/CompactionTxnHandler.java
@@ -328,7 +328,7 @@ class CompactionTxnHandler extends TxnHandler {
/**
* This will remove an entry from the queue after
* it has been compacted.
- *
+ *
* @param info info on the compaction entry to remove
*/
@Override
@@ -593,54 +593,6 @@ class CompactionTxnHandler extends TxnHandler {
StringBuilder prefix = new StringBuilder();
StringBuilder suffix = new StringBuilder();
- // Turn off COLUMN_STATS_ACCURATE for txnids' components in TBLS and PARTITIONS
- prefix.append("select tbl_id from TBLS inner join DBS on TBLS.DB_ID = DBS.DB_ID "
- + "inner join TXN_TO_WRITE_ID on t2w_database = DBS.NAME and t2w_table = TBLS.TBL_NAME"
- + " and t2w_writeid = TBLS.WRITE_ID where ");
- suffix.append("");
- TxnUtils.buildQueryWithINClause(conf, queries, prefix, suffix, txnids, "t2w_txnid", true, false);
-
- // Delete COLUMN_STATS_ACCURATE.BASIC_STATS rows from TABLE_PARAMS for the txnids.
- List<StringBuilder> finalCommands = new ArrayList<>(queries.size());
- for (int i = 0; i < queries.size(); i++) {
- String query = queries.get(i);
- finalCommands.add(i, new StringBuilder("delete from TABLE_PARAMS " +
- " where param_key = '" + "COLUMN_STATS_ACCURATE" + "' and tbl_id in ("));
- finalCommands.get(i).append(query + ")");
- LOG.debug("Going to execute update <" + finalCommands.get(i) + ">");
- int rc = stmt.executeUpdate(finalCommands.get(i).toString());
- LOG.info("Turned off " + rc + " COLUMN_STATE_ACCURATE.BASIC_STATS states from TBLS");
- }
-
- queries.clear();
- prefix.setLength(0);
- suffix.setLength(0);
- finalCommands.clear();
-
- // Delete COLUMN_STATS_ACCURATE.BASIC_STATS rows from PARTITIONS_PARAMS for the txnids.
- prefix.append("select part_id from PARTITIONS "
- + "inner join TBLS on PARTITIONS.TBL_ID = TBLS.TBL_ID "
- + "inner join DBS on TBLS.DB_ID = DBS.DB_ID "
- + "inner join TXN_TO_WRITE_ID on t2w_database = DBS.NAME and t2w_table = TBLS.TBL_NAME"
- + " and t2w_writeid = TBLS.WRITE_ID where ");
- suffix.append("");
- TxnUtils.buildQueryWithINClause(conf, queries, prefix, suffix, txnids, "t2w_txnid", true, false);
-
- for (int i = 0; i < queries.size(); i++) {
- String query = queries.get(i);
- finalCommands.add(i, new StringBuilder("delete from PARTITION_PARAMS " +
- " where param_key = '" + "COLUMN_STATS_ACCURATE" + "' and part_id in ("));
- finalCommands.get(i).append(query + ")");
- LOG.debug("Going to execute update <" + finalCommands.get(i) + ">");
- int rc = stmt.executeUpdate(finalCommands.get(i).toString());
- LOG.info("Turned off " + rc + " COLUMN_STATE_ACCURATE.BASIC_STATS states from PARTITIONS");
- }
-
- queries.clear();
- prefix.setLength(0);
- suffix.setLength(0);
- finalCommands.clear();
-
// Delete from TXNS.
prefix.append("delete from TXNS where ");
suffix.append("");
@@ -993,7 +945,7 @@ class CompactionTxnHandler extends TxnHandler {
* User initiated compactions don't do this check.
*
* Do we allow compacting whole table (when it's partitioned)? No, though perhaps we should.
- * That would be a meta operations, i.e. first find all partitions for this table (which have
+ * That would be a meta operations, i.e. first find all partitions for this table (which have
* txn info) and schedule each compaction separately. This avoids complications in this logic.
*/
@Override
[2/3] hive git commit: HIVE-20431 : txn stats write ID check triggers
on set location (Sergey Shelukhin, reviewed by Eugene Koifman)
Posted by se...@apache.org.
HIVE-20431 : txn stats write ID check triggers on set location (Sergey Shelukhin, reviewed by Eugene Koifman)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ec965fa3
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ec965fa3
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ec965fa3
Branch: refs/heads/master
Commit: ec965fa32dbd7146dba4aec1e12cad30240e8e1b
Parents: 5c3ed06
Author: sergey <se...@apache.org>
Authored: Thu Aug 23 12:12:26 2018 -0700
Committer: sergey <se...@apache.org>
Committed: Thu Aug 23 12:12:26 2018 -0700
----------------------------------------------------------------------
.../hive/ql/parse/DDLSemanticAnalyzer.java | 9 +-
.../hadoop/hive/ql/plan/AlterTableDesc.java | 20 +-
.../test/queries/clientpositive/acid_stats4.q | 34 +-
.../results/clientpositive/acid_stats4.q.out | 381 ++++++++++++++++++-
4 files changed, 431 insertions(+), 13 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/ec965fa3/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java
index 2b9f763..273fccb 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java
@@ -1992,7 +1992,10 @@ public class DDLSemanticAnalyzer extends BaseSemanticAnalyzer {
}
addLocationToOutputs(newLocation);
AlterTableDesc alterTblDesc = new AlterTableDesc(tableName, newLocation, partSpec);
-
+ Table tbl = getTable(tableName);
+ if (AcidUtils.isTransactionalTable(tbl)) {
+ setAcidDdlDesc(alterTblDesc);
+ }
addInputsOutputsAlterTable(tableName, partSpec, alterTblDesc);
rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(),
alterTblDesc)));
@@ -2277,6 +2280,10 @@ public class DDLSemanticAnalyzer extends BaseSemanticAnalyzer {
alterTblDesc.setOldName(tableName);
alterTblDesc.setIsCascade(isCascade);
alterTblDesc.setPartSpec(partSpec);
+ Table tbl = getTable(tableName);
+ if (AcidUtils.isTransactionalTable(tbl)) {
+ setAcidDdlDesc(alterTblDesc);
+ }
rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(),
alterTblDesc), conf));
http://git-wip-us.apache.org/repos/asf/hive/blob/ec965fa3/ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableDesc.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableDesc.java
index 680e029..423ca2a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableDesc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableDesc.java
@@ -62,11 +62,15 @@ public class AlterTableDesc extends DDLDesc implements Serializable, DDLDesc.DDL
ADDFILEFORMAT("add fileformat"), ADDCLUSTERSORTCOLUMN("add cluster sort column"),
RENAMECOLUMN("rename column"), ADDPARTITION("add partition"), TOUCH("touch"), ARCHIVE("archieve"),
UNARCHIVE("unarchieve"), ALTERLOCATION("alter location"),
- DROPPARTITION("drop partition"), RENAMEPARTITION("rename partition"), ADDSKEWEDBY("add skew column"),
+ DROPPARTITION("drop partition"),
+ RENAMEPARTITION("rename partition"), // Note: used in RenamePartitionDesc, not here.
+ ADDSKEWEDBY("add skew column"),
ALTERSKEWEDLOCATION("alter skew location"), ALTERBUCKETNUM("alter bucket number"),
- ALTERPARTITION("alter partition"), COMPACT("compact"),
+ ALTERPARTITION("alter partition"), // Note: this is never used in AlterTableDesc.
+ COMPACT("compact"),
TRUNCATE("truncate"), MERGEFILES("merge files"), DROPCONSTRAINT("drop constraint"), ADDCONSTRAINT("add constraint"),
- UPDATECOLUMNS("update columns"), OWNER("set owner"), UPDATESTATS("update stats");
+ UPDATECOLUMNS("update columns"), OWNER("set owner"),
+ UPDATESTATS("update stats"); // Note: used in ColumnStatsUpdateWork, not here.
;
private final String name;
@@ -969,10 +973,12 @@ public class AlterTableDesc extends DDLDesc implements Serializable, DDLDesc.DDL
case DROPPROPS: return isExplicitStatsUpdate;
// The check for the following ones is performed before setting AlterTableDesc into the acid field.
// These need write ID and stuff because they invalidate column stats.
- case RENAMECOLUMN: return true;
- case RENAME: return true;
- case REPLACECOLS: return true;
- case ADDCOLS: return true;
+ case RENAMECOLUMN:
+ case RENAME:
+ case REPLACECOLS:
+ case ADDCOLS:
+ case ALTERLOCATION:
+ case UPDATECOLUMNS: return true;
// RENAMEPARTITION is handled in RenamePartitionDesc
default: return false;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/ec965fa3/ql/src/test/queries/clientpositive/acid_stats4.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/acid_stats4.q b/ql/src/test/queries/clientpositive/acid_stats4.q
index 20d1159..9dca3c3 100644
--- a/ql/src/test/queries/clientpositive/acid_stats4.q
+++ b/ql/src/test/queries/clientpositive/acid_stats4.q
@@ -25,17 +25,36 @@ explain select count(key) from stats_nonpart;
ALTER TABLE stats_nonpart CHANGE COLUMN key key2 int;
explain select count(key2) from stats_nonpart;
explain select count(value) from stats_nonpart;
-
analyze table stats_nonpart compute statistics for columns;
explain select count(key2) from stats_nonpart;
alter table stats_nonpart rename to stats_nonpart2;
explain select count(key2) from stats_nonpart2;
-
analyze table stats_nonpart2 compute statistics for columns;
explain select count(key2) from stats_nonpart2;
-drop table stats_nonpart;
+alter table stats_nonpart2 set serde 'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe';
+alter table stats_nonpart2 set serdeproperties ("foo"="bar");
+alter table stats_nonpart2 set fileformat rcfile;
+explain select count(key2) from stats_nonpart2;
+
+alter table stats_nonpart2 set location 'file:${system:test.tmp.dir}/stats_nonpart_zzz';
+explain select count(key2) from stats_nonpart2;
+
+insert overwrite table stats_nonpart2 values (1, "foo");
+explain select count(key2) from stats_nonpart2;
+
+alter table stats_nonpart2 add constraint primary_key primary key (key2) disable novalidate rely;
+alter table stats_nonpart2 drop constraint primary_key;
+explain select count(key2) from stats_nonpart2;
+
+alter table stats_nonpart2 clustered by (key2) INTO 2 BUCKETS;
+explain select count(key2) from stats_nonpart2;
+insert into table stats_nonpart2 values (2, "foo");
+explain select count(key2) from stats_nonpart2;
+
+
+drop table stats_nonpart2;
create table stats_part(key int,value string) partitioned by (p int) tblproperties ("transactional"="true", "transactional_properties"="insert_only");
@@ -63,6 +82,15 @@ explain select count(value) from stats_part;
analyze table stats_part partition(p) compute statistics for columns;
explain select count(key2) from stats_part;
+alter table stats_part add partition(p=105);
+explain select count(key2) from stats_part;
+analyze table stats_part partition(p) compute statistics for columns;
+explain select count(key2) from stats_part;
+
+alter table stats_part drop partition(p=104);
+explain select count(key2) from stats_part;
+
+
drop table stats_part;
http://git-wip-us.apache.org/repos/asf/hive/blob/ec965fa3/ql/src/test/results/clientpositive/acid_stats4.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/acid_stats4.q.out b/ql/src/test/results/clientpositive/acid_stats4.q.out
index afd5adb..c22bf96 100644
--- a/ql/src/test/results/clientpositive/acid_stats4.q.out
+++ b/ql/src/test/results/clientpositive/acid_stats4.q.out
@@ -172,10 +172,202 @@ STAGE PLANS:
Processor Tree:
ListSink
-PREHOOK: query: drop table stats_nonpart
+PREHOOK: query: alter table stats_nonpart2 set serde 'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe'
+PREHOOK: type: ALTERTABLE_SERIALIZER
+PREHOOK: Input: default@stats_nonpart2
+PREHOOK: Output: default@stats_nonpart2
+POSTHOOK: query: alter table stats_nonpart2 set serde 'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe'
+POSTHOOK: type: ALTERTABLE_SERIALIZER
+POSTHOOK: Input: default@stats_nonpart2
+POSTHOOK: Output: default@stats_nonpart2
+PREHOOK: query: alter table stats_nonpart2 set serdeproperties ("foo"="bar")
+PREHOOK: type: ALTERTABLE_SERDEPROPERTIES
+PREHOOK: Input: default@stats_nonpart2
+PREHOOK: Output: default@stats_nonpart2
+POSTHOOK: query: alter table stats_nonpart2 set serdeproperties ("foo"="bar")
+POSTHOOK: type: ALTERTABLE_SERDEPROPERTIES
+POSTHOOK: Input: default@stats_nonpart2
+POSTHOOK: Output: default@stats_nonpart2
+PREHOOK: query: alter table stats_nonpart2 set fileformat rcfile
+PREHOOK: type: ALTERTABLE_FILEFORMAT
+PREHOOK: Input: default@stats_nonpart2
+PREHOOK: Output: default@stats_nonpart2
+POSTHOOK: query: alter table stats_nonpart2 set fileformat rcfile
+POSTHOOK: type: ALTERTABLE_FILEFORMAT
+POSTHOOK: Input: default@stats_nonpart2
+POSTHOOK: Output: default@stats_nonpart2
+PREHOOK: query: explain select count(key2) from stats_nonpart2
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count(key2) from stats_nonpart2
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: 1
+ Processor Tree:
+ ListSink
+
+#### A masked pattern was here ####
+PREHOOK: type: ALTERTABLE_LOCATION
+PREHOOK: Input: default@stats_nonpart2
+PREHOOK: Output: default@stats_nonpart2
+#### A masked pattern was here ####
+POSTHOOK: type: ALTERTABLE_LOCATION
+POSTHOOK: Input: default@stats_nonpart2
+POSTHOOK: Output: default@stats_nonpart2
+#### A masked pattern was here ####
+PREHOOK: query: explain select count(key2) from stats_nonpart2
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count(key2) from stats_nonpart2
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: stats_nonpart2
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key2 (type: int)
+ outputColumnNames: key2
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(key2)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: insert overwrite table stats_nonpart2 values (1, "foo")
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@stats_nonpart2
+POSTHOOK: query: insert overwrite table stats_nonpart2 values (1, "foo")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@stats_nonpart2
+POSTHOOK: Lineage: stats_nonpart2.key2 SCRIPT []
+POSTHOOK: Lineage: stats_nonpart2.value SCRIPT []
+PREHOOK: query: explain select count(key2) from stats_nonpart2
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count(key2) from stats_nonpart2
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: 1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: alter table stats_nonpart2 add constraint primary_key primary key (key2) disable novalidate rely
+PREHOOK: type: ALTERTABLE_ADDCONSTRAINT
+POSTHOOK: query: alter table stats_nonpart2 add constraint primary_key primary key (key2) disable novalidate rely
+POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT
+PREHOOK: query: alter table stats_nonpart2 drop constraint primary_key
+PREHOOK: type: ALTERTABLE_DROPCONSTRAINT
+POSTHOOK: query: alter table stats_nonpart2 drop constraint primary_key
+POSTHOOK: type: ALTERTABLE_DROPCONSTRAINT
+PREHOOK: query: explain select count(key2) from stats_nonpart2
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count(key2) from stats_nonpart2
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: 1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: alter table stats_nonpart2 clustered by (key2) INTO 2 BUCKETS
+PREHOOK: type: ALTERTABLE_CLUSTER_SORT
+PREHOOK: Input: default@stats_nonpart2
+PREHOOK: Output: default@stats_nonpart2
+POSTHOOK: query: alter table stats_nonpart2 clustered by (key2) INTO 2 BUCKETS
+POSTHOOK: type: ALTERTABLE_CLUSTER_SORT
+POSTHOOK: Input: default@stats_nonpart2
+POSTHOOK: Output: default@stats_nonpart2
+PREHOOK: query: explain select count(key2) from stats_nonpart2
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count(key2) from stats_nonpart2
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: 1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: insert into table stats_nonpart2 values (2, "foo")
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@stats_nonpart2
+POSTHOOK: query: insert into table stats_nonpart2 values (2, "foo")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@stats_nonpart2
+POSTHOOK: Lineage: stats_nonpart2.key2 SCRIPT []
+POSTHOOK: Lineage: stats_nonpart2.value SCRIPT []
+PREHOOK: query: explain select count(key2) from stats_nonpart2
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count(key2) from stats_nonpart2
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: 1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: drop table stats_nonpart2
PREHOOK: type: DROPTABLE
-POSTHOOK: query: drop table stats_nonpart
+PREHOOK: Input: default@stats_nonpart2
+PREHOOK: Output: default@stats_nonpart2
+POSTHOOK: query: drop table stats_nonpart2
POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@stats_nonpart2
+POSTHOOK: Output: default@stats_nonpart2
PREHOOK: query: create table stats_part(key int,value string) partitioned by (p int) tblproperties ("transactional"="true", "transactional_properties"="insert_only")
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
@@ -451,6 +643,191 @@ STAGE PLANS:
Processor Tree:
ListSink
+PREHOOK: query: alter table stats_part add partition(p=105)
+PREHOOK: type: ALTERTABLE_ADDPARTS
+PREHOOK: Output: default@stats_part
+POSTHOOK: query: alter table stats_part add partition(p=105)
+POSTHOOK: type: ALTERTABLE_ADDPARTS
+POSTHOOK: Output: default@stats_part
+POSTHOOK: Output: default@stats_part@p=105
+PREHOOK: query: explain select count(key2) from stats_part
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count(key2) from stats_part
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: stats_part
+ Statistics: Num rows: 3 Data size: 12 Basic stats: PARTIAL Column stats: PARTIAL
+ Select Operator
+ expressions: key2 (type: int)
+ outputColumnNames: key2
+ Statistics: Num rows: 3 Data size: 12 Basic stats: PARTIAL Column stats: PARTIAL
+ Group By Operator
+ aggregations: count(key2)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 12 Basic stats: PARTIAL Column stats: PARTIAL
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 12 Basic stats: PARTIAL Column stats: PARTIAL
+ value expressions: _col0 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 12 Basic stats: PARTIAL Column stats: PARTIAL
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 12 Basic stats: PARTIAL Column stats: PARTIAL
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: analyze table stats_part partition(p) compute statistics for columns
+PREHOOK: type: ANALYZE_TABLE
+PREHOOK: Input: default@stats_part
+PREHOOK: Input: default@stats_part@p=101
+PREHOOK: Input: default@stats_part@p=103
+PREHOOK: Input: default@stats_part@p=104
+PREHOOK: Input: default@stats_part@p=105
+PREHOOK: Output: default@stats_part
+PREHOOK: Output: default@stats_part@p=101
+PREHOOK: Output: default@stats_part@p=103
+PREHOOK: Output: default@stats_part@p=104
+PREHOOK: Output: default@stats_part@p=105
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table stats_part partition(p) compute statistics for columns
+POSTHOOK: type: ANALYZE_TABLE
+POSTHOOK: Input: default@stats_part
+POSTHOOK: Input: default@stats_part@p=101
+POSTHOOK: Input: default@stats_part@p=103
+POSTHOOK: Input: default@stats_part@p=104
+POSTHOOK: Input: default@stats_part@p=105
+POSTHOOK: Output: default@stats_part
+POSTHOOK: Output: default@stats_part@p=101
+POSTHOOK: Output: default@stats_part@p=103
+POSTHOOK: Output: default@stats_part@p=104
+POSTHOOK: Output: default@stats_part@p=105
+#### A masked pattern was here ####
+PREHOOK: query: explain select count(key2) from stats_part
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count(key2) from stats_part
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: stats_part
+ Statistics: Num rows: 3 Data size: 12 Basic stats: PARTIAL Column stats: PARTIAL
+ Select Operator
+ expressions: key2 (type: int)
+ outputColumnNames: key2
+ Statistics: Num rows: 3 Data size: 12 Basic stats: PARTIAL Column stats: PARTIAL
+ Group By Operator
+ aggregations: count(key2)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 12 Basic stats: PARTIAL Column stats: PARTIAL
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 12 Basic stats: PARTIAL Column stats: PARTIAL
+ value expressions: _col0 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 12 Basic stats: PARTIAL Column stats: PARTIAL
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 12 Basic stats: PARTIAL Column stats: PARTIAL
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: alter table stats_part drop partition(p=104)
+PREHOOK: type: ALTERTABLE_DROPPARTS
+PREHOOK: Input: default@stats_part
+PREHOOK: Output: default@stats_part@p=104
+POSTHOOK: query: alter table stats_part drop partition(p=104)
+POSTHOOK: type: ALTERTABLE_DROPPARTS
+POSTHOOK: Input: default@stats_part
+POSTHOOK: Output: default@stats_part@p=104
+PREHOOK: query: explain select count(key2) from stats_part
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count(key2) from stats_part
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: stats_part
+ Statistics: Num rows: 2 Data size: 8 Basic stats: PARTIAL Column stats: COMPLETE
+ Select Operator
+ expressions: key2 (type: int)
+ outputColumnNames: key2
+ Statistics: Num rows: 2 Data size: 8 Basic stats: PARTIAL Column stats: COMPLETE
+ Group By Operator
+ aggregations: count(key2)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 12 Basic stats: PARTIAL Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 12 Basic stats: PARTIAL Column stats: COMPLETE
+ value expressions: _col0 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 12 Basic stats: PARTIAL Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 12 Basic stats: PARTIAL Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
PREHOOK: query: drop table stats_part
PREHOOK: type: DROPTABLE
PREHOOK: Input: default@stats_part
[3/3] hive git commit: HIVE-20399 : CTAS w/a custom table location
that is not fully qualified fails for MM tables (Sergey Shelukhin,
reviewed by Ashutosh Chauhan)
Posted by se...@apache.org.
HIVE-20399 : CTAS w/a custom table location that is not fully qualified fails for MM tables (Sergey Shelukhin, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/611770dd
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/611770dd
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/611770dd
Branch: refs/heads/master
Commit: 611770ddf9ab7f9c860468dc58b029b4e884beb4
Parents: ec965fa
Author: sergey <se...@apache.org>
Authored: Thu Aug 23 12:20:39 2018 -0700
Committer: sergey <se...@apache.org>
Committed: Thu Aug 23 12:20:39 2018 -0700
----------------------------------------------------------------------
.../apache/hadoop/hive/ql/exec/Utilities.java | 13 +++----
.../test/queries/clientpositive/mm_loc_ctas.q | 19 ++++++++++
.../results/clientpositive/mm_loc_ctas.q.out | 38 ++++++++++++++++++++
3 files changed, 64 insertions(+), 6 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/611770dd/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
index 30bee18..74fb1ba 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
@@ -4277,14 +4277,15 @@ public final class Utilities {
}
}
- HashSet<String> committed = new HashSet<>();
+ HashSet<Path> committed = new HashSet<>();
for (Path mfp : manifests) {
try (FSDataInputStream mdis = fs.open(mfp)) {
int fileCount = mdis.readInt();
for (int i = 0; i < fileCount; ++i) {
String nextFile = mdis.readUTF();
Utilities.FILE_OP_LOGGER.trace("Looking at committed file: {}", nextFile);
- if (!committed.add(nextFile)) {
+ Path path = fs.makeQualified(new Path(nextFile));
+ if (!committed.add(path)) {
throw new HiveException(nextFile + " was specified in multiple manifests");
}
}
@@ -4345,7 +4346,7 @@ public final class Utilities {
}
private static void cleanMmDirectory(Path dir, FileSystem fs, String unionSuffix,
- int lbLevels, HashSet<String> committed) throws IOException, HiveException {
+ int lbLevels, HashSet<Path> committed) throws IOException, HiveException {
for (FileStatus child : fs.listStatus(dir)) {
Path childPath = child.getPath();
if (lbLevels > 0) {
@@ -4357,7 +4358,7 @@ public final class Utilities {
"Recursion into LB directory {}; levels remaining ", childPath, lbLevels - 1);
cleanMmDirectory(childPath, fs, unionSuffix, lbLevels - 1, committed);
} else {
- if (committed.contains(childPath.toString())) {
+ if (committed.contains(childPath)) {
throw new HiveException("LB FSOP has commited "
+ childPath + " outside of LB directory levels " + lbLevels);
}
@@ -4367,12 +4368,12 @@ public final class Utilities {
}
// No more LB directories expected.
if (unionSuffix == null) {
- if (committed.remove(childPath.toString())) {
+ if (committed.remove(childPath)) {
continue; // A good file.
}
deleteUncommitedFile(childPath, fs);
} else if (!child.isDirectory()) {
- if (committed.contains(childPath.toString())) {
+ if (committed.contains(childPath)) {
throw new HiveException("Union FSOP has commited "
+ childPath + " outside of union directory " + unionSuffix);
}
http://git-wip-us.apache.org/repos/asf/hive/blob/611770dd/ql/src/test/queries/clientpositive/mm_loc_ctas.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/mm_loc_ctas.q b/ql/src/test/queries/clientpositive/mm_loc_ctas.q
new file mode 100644
index 0000000..8e4cbbb
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/mm_loc_ctas.q
@@ -0,0 +1,19 @@
+--! qt:dataset:src
+
+set hive.metastore.dml.events=true;
+set hive.mapred.mode=nonstrict;
+set hive.explain.user=false;
+set hive.fetch.task.conversion=none;
+set tez.grouping.min-size=1;
+set tez.grouping.max-size=2;
+set hive.exec.dynamic.partition.mode=nonstrict;
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+
+
+drop table test;
+create table test(id int, name string);
+insert into test values(1, 'aa'),(2,'bb');
+
+drop table test3;
+CREATE TABLE test3 stored as textfile LOCATION '${system:test.tmp.dir}/test2' tblproperties('transactional'='true', 'transactional_properties'='insert_only') AS SELECT * from test;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/611770dd/ql/src/test/results/clientpositive/mm_loc_ctas.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/mm_loc_ctas.q.out b/ql/src/test/results/clientpositive/mm_loc_ctas.q.out
new file mode 100644
index 0000000..471d835
--- /dev/null
+++ b/ql/src/test/results/clientpositive/mm_loc_ctas.q.out
@@ -0,0 +1,38 @@
+PREHOOK: query: drop table test
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table test
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table test(id int, name string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test
+POSTHOOK: query: create table test(id int, name string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test
+PREHOOK: query: insert into test values(1, 'aa'),(2,'bb')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@test
+POSTHOOK: query: insert into test values(1, 'aa'),(2,'bb')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@test
+POSTHOOK: Lineage: test.id SCRIPT []
+POSTHOOK: Lineage: test.name SCRIPT []
+PREHOOK: query: drop table test3
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table test3
+POSTHOOK: type: DROPTABLE
+#### A masked pattern was here ####
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@test
+#### A masked pattern was here ####
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test3
+#### A masked pattern was here ####
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@test
+#### A masked pattern was here ####
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test3