You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by se...@apache.org on 2018/08/23 19:20:57 UTC

[1/3] hive git commit: HIVE-20443 : txn stats cleanup in compaction txn handler is unneeded (Sergey Shelukhin, reviewed by Eugene Koifman)

Repository: hive
Updated Branches:
  refs/heads/master 236a32c64 -> 611770ddf


HIVE-20443 : txn stats cleanup in compaction txn handler is unneeded (Sergey Shelukhin, reviewed by Eugene Koifman)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/5c3ed065
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/5c3ed065
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/5c3ed065

Branch: refs/heads/master
Commit: 5c3ed065ab2636618548e9461a6e5d74c83c79f8
Parents: 236a32c
Author: sergey <se...@apache.org>
Authored: Thu Aug 23 12:11:37 2018 -0700
Committer: sergey <se...@apache.org>
Committed: Thu Aug 23 12:11:37 2018 -0700

----------------------------------------------------------------------
 .../metastore/txn/CompactionTxnHandler.java     | 52 +-------------------
 1 file changed, 2 insertions(+), 50 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/5c3ed065/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/CompactionTxnHandler.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/CompactionTxnHandler.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/CompactionTxnHandler.java
index 1f559e9..cbb76d5 100644
--- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/CompactionTxnHandler.java
+++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/CompactionTxnHandler.java
@@ -328,7 +328,7 @@ class CompactionTxnHandler extends TxnHandler {
   /**
    * This will remove an entry from the queue after
    * it has been compacted.
-   * 
+   *
    * @param info info on the compaction entry to remove
    */
   @Override
@@ -593,54 +593,6 @@ class CompactionTxnHandler extends TxnHandler {
         StringBuilder prefix = new StringBuilder();
         StringBuilder suffix = new StringBuilder();
 
-        // Turn off COLUMN_STATS_ACCURATE for txnids' components in TBLS and PARTITIONS
-        prefix.append("select tbl_id from TBLS inner join DBS on TBLS.DB_ID = DBS.DB_ID "
-            + "inner join TXN_TO_WRITE_ID on t2w_database = DBS.NAME and t2w_table = TBLS.TBL_NAME"
-            + " and t2w_writeid = TBLS.WRITE_ID where ");
-        suffix.append("");
-        TxnUtils.buildQueryWithINClause(conf, queries, prefix, suffix, txnids, "t2w_txnid", true, false);
-
-        // Delete COLUMN_STATS_ACCURATE.BASIC_STATS rows from TABLE_PARAMS for the txnids.
-        List<StringBuilder> finalCommands = new ArrayList<>(queries.size());
-        for (int i = 0; i < queries.size(); i++) {
-          String query = queries.get(i);
-          finalCommands.add(i, new StringBuilder("delete from TABLE_PARAMS " +
-                  " where param_key = '" + "COLUMN_STATS_ACCURATE" + "' and tbl_id in ("));
-          finalCommands.get(i).append(query + ")");
-          LOG.debug("Going to execute update <" + finalCommands.get(i) + ">");
-          int rc = stmt.executeUpdate(finalCommands.get(i).toString());
-          LOG.info("Turned off " + rc + " COLUMN_STATE_ACCURATE.BASIC_STATS states from TBLS");
-        }
-
-        queries.clear();
-        prefix.setLength(0);
-        suffix.setLength(0);
-        finalCommands.clear();
-
-        // Delete COLUMN_STATS_ACCURATE.BASIC_STATS rows from PARTITIONS_PARAMS for the txnids.
-        prefix.append("select part_id from PARTITIONS "
-            + "inner join TBLS on PARTITIONS.TBL_ID = TBLS.TBL_ID "
-            + "inner join DBS on TBLS.DB_ID = DBS.DB_ID "
-            + "inner join TXN_TO_WRITE_ID on t2w_database = DBS.NAME and t2w_table = TBLS.TBL_NAME"
-            + " and t2w_writeid = TBLS.WRITE_ID where ");
-        suffix.append("");
-        TxnUtils.buildQueryWithINClause(conf, queries, prefix, suffix, txnids, "t2w_txnid", true, false);
-
-        for (int i = 0; i < queries.size(); i++) {
-          String query = queries.get(i);
-          finalCommands.add(i, new StringBuilder("delete from PARTITION_PARAMS " +
-                  " where param_key = '" + "COLUMN_STATS_ACCURATE" + "' and part_id in ("));
-          finalCommands.get(i).append(query + ")");
-          LOG.debug("Going to execute update <" + finalCommands.get(i) + ">");
-          int rc = stmt.executeUpdate(finalCommands.get(i).toString());
-          LOG.info("Turned off " + rc + " COLUMN_STATE_ACCURATE.BASIC_STATS states from PARTITIONS");
-        }
-
-        queries.clear();
-        prefix.setLength(0);
-        suffix.setLength(0);
-        finalCommands.clear();
-
         // Delete from TXNS.
         prefix.append("delete from TXNS where ");
         suffix.append("");
@@ -993,7 +945,7 @@ class CompactionTxnHandler extends TxnHandler {
    * User initiated compactions don't do this check.
    *
    * Do we allow compacting whole table (when it's partitioned)?  No, though perhaps we should.
-   * That would be a meta operations, i.e. first find all partitions for this table (which have 
+   * That would be a meta operations, i.e. first find all partitions for this table (which have
    * txn info) and schedule each compaction separately.  This avoids complications in this logic.
    */
   @Override


[2/3] hive git commit: HIVE-20431 : txn stats write ID check triggers on set location (Sergey Shelukhin, reviewed by Eugene Koifman)

Posted by se...@apache.org.
HIVE-20431 : txn stats write ID check triggers on set location (Sergey Shelukhin, reviewed by Eugene Koifman)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ec965fa3
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ec965fa3
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ec965fa3

Branch: refs/heads/master
Commit: ec965fa32dbd7146dba4aec1e12cad30240e8e1b
Parents: 5c3ed06
Author: sergey <se...@apache.org>
Authored: Thu Aug 23 12:12:26 2018 -0700
Committer: sergey <se...@apache.org>
Committed: Thu Aug 23 12:12:26 2018 -0700

----------------------------------------------------------------------
 .../hive/ql/parse/DDLSemanticAnalyzer.java      |   9 +-
 .../hadoop/hive/ql/plan/AlterTableDesc.java     |  20 +-
 .../test/queries/clientpositive/acid_stats4.q   |  34 +-
 .../results/clientpositive/acid_stats4.q.out    | 381 ++++++++++++++++++-
 4 files changed, 431 insertions(+), 13 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/ec965fa3/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java
index 2b9f763..273fccb 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java
@@ -1992,7 +1992,10 @@ public class DDLSemanticAnalyzer extends BaseSemanticAnalyzer {
     }
     addLocationToOutputs(newLocation);
     AlterTableDesc alterTblDesc = new AlterTableDesc(tableName, newLocation, partSpec);
-
+    Table tbl = getTable(tableName);
+    if (AcidUtils.isTransactionalTable(tbl)) {
+      setAcidDdlDesc(alterTblDesc);
+    }
     addInputsOutputsAlterTable(tableName, partSpec, alterTblDesc);
     rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(),
         alterTblDesc)));
@@ -2277,6 +2280,10 @@ public class DDLSemanticAnalyzer extends BaseSemanticAnalyzer {
     alterTblDesc.setOldName(tableName);
     alterTblDesc.setIsCascade(isCascade);
     alterTblDesc.setPartSpec(partSpec);
+    Table tbl = getTable(tableName);
+    if (AcidUtils.isTransactionalTable(tbl)) {
+      setAcidDdlDesc(alterTblDesc);
+    }
 
     rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(),
             alterTblDesc), conf));

http://git-wip-us.apache.org/repos/asf/hive/blob/ec965fa3/ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableDesc.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableDesc.java
index 680e029..423ca2a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableDesc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableDesc.java
@@ -62,11 +62,15 @@ public class AlterTableDesc extends DDLDesc implements Serializable, DDLDesc.DDL
     ADDFILEFORMAT("add fileformat"), ADDCLUSTERSORTCOLUMN("add cluster sort column"),
     RENAMECOLUMN("rename column"), ADDPARTITION("add partition"), TOUCH("touch"), ARCHIVE("archieve"),
     UNARCHIVE("unarchieve"), ALTERLOCATION("alter location"),
-    DROPPARTITION("drop partition"), RENAMEPARTITION("rename partition"), ADDSKEWEDBY("add skew column"),
+    DROPPARTITION("drop partition"),
+    RENAMEPARTITION("rename partition"), // Note: used in RenamePartitionDesc, not here.
+    ADDSKEWEDBY("add skew column"),
     ALTERSKEWEDLOCATION("alter skew location"), ALTERBUCKETNUM("alter bucket number"),
-    ALTERPARTITION("alter partition"), COMPACT("compact"),
+    ALTERPARTITION("alter partition"), // Note: this is never used in AlterTableDesc.
+    COMPACT("compact"),
     TRUNCATE("truncate"), MERGEFILES("merge files"), DROPCONSTRAINT("drop constraint"), ADDCONSTRAINT("add constraint"),
-    UPDATECOLUMNS("update columns"), OWNER("set owner"), UPDATESTATS("update stats");
+    UPDATECOLUMNS("update columns"), OWNER("set owner"),
+    UPDATESTATS("update stats"); // Note: used in ColumnStatsUpdateWork, not here.
     ;
 
     private final String name;
@@ -969,10 +973,12 @@ public class AlterTableDesc extends DDLDesc implements Serializable, DDLDesc.DDL
     case DROPPROPS: return isExplicitStatsUpdate;
     // The check for the following ones is performed before setting AlterTableDesc into the acid field.
     // These need write ID and stuff because they invalidate column stats.
-    case RENAMECOLUMN: return true;
-    case RENAME: return true;
-    case REPLACECOLS: return true;
-    case ADDCOLS: return true;
+    case RENAMECOLUMN:
+    case RENAME:
+    case REPLACECOLS:
+    case ADDCOLS:
+    case ALTERLOCATION:
+    case UPDATECOLUMNS: return true;
     // RENAMEPARTITION is handled in RenamePartitionDesc
     default: return false;
     }

http://git-wip-us.apache.org/repos/asf/hive/blob/ec965fa3/ql/src/test/queries/clientpositive/acid_stats4.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/acid_stats4.q b/ql/src/test/queries/clientpositive/acid_stats4.q
index 20d1159..9dca3c3 100644
--- a/ql/src/test/queries/clientpositive/acid_stats4.q
+++ b/ql/src/test/queries/clientpositive/acid_stats4.q
@@ -25,17 +25,36 @@ explain select count(key) from stats_nonpart;
 ALTER TABLE stats_nonpart CHANGE COLUMN key key2 int;
 explain select count(key2) from stats_nonpart;
 explain select count(value) from stats_nonpart;
-
 analyze table stats_nonpart compute statistics for columns;
 explain select count(key2) from stats_nonpart;
 
 alter table stats_nonpart rename to stats_nonpart2;
 explain select count(key2) from stats_nonpart2;
-
 analyze table stats_nonpart2 compute statistics for columns;
 explain select count(key2) from stats_nonpart2;
 
-drop table stats_nonpart;
+alter table stats_nonpart2 set serde 'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe';
+alter table stats_nonpart2 set serdeproperties ("foo"="bar");
+alter table stats_nonpart2 set fileformat rcfile;
+explain select count(key2) from stats_nonpart2;
+
+alter table stats_nonpart2 set location 'file:${system:test.tmp.dir}/stats_nonpart_zzz';
+explain select count(key2) from stats_nonpart2;
+
+insert overwrite table stats_nonpart2 values (1, "foo");
+explain select count(key2) from stats_nonpart2;
+
+alter table stats_nonpart2 add constraint primary_key primary key (key2) disable novalidate rely;
+alter table stats_nonpart2 drop constraint primary_key;
+explain select count(key2) from stats_nonpart2;
+
+alter table stats_nonpart2 clustered by (key2) INTO 2 BUCKETS;
+explain select count(key2) from stats_nonpart2;
+insert into table stats_nonpart2 values (2, "foo");
+explain select count(key2) from stats_nonpart2;
+
+
+drop table stats_nonpart2;
 
 
 create table stats_part(key int,value string) partitioned by (p int) tblproperties ("transactional"="true", "transactional_properties"="insert_only");
@@ -63,6 +82,15 @@ explain select count(value) from stats_part;
 analyze table stats_part partition(p) compute statistics for columns;
 explain select count(key2) from stats_part;
 
+alter table stats_part add partition(p=105);
+explain select count(key2) from stats_part;
+analyze table stats_part partition(p) compute statistics for columns;
+explain select count(key2) from stats_part;
+
+alter table stats_part drop partition(p=104);
+explain select count(key2) from stats_part;
+
+
 
 drop table stats_part;
 

http://git-wip-us.apache.org/repos/asf/hive/blob/ec965fa3/ql/src/test/results/clientpositive/acid_stats4.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/acid_stats4.q.out b/ql/src/test/results/clientpositive/acid_stats4.q.out
index afd5adb..c22bf96 100644
--- a/ql/src/test/results/clientpositive/acid_stats4.q.out
+++ b/ql/src/test/results/clientpositive/acid_stats4.q.out
@@ -172,10 +172,202 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-PREHOOK: query: drop table stats_nonpart
+PREHOOK: query: alter table stats_nonpart2 set serde 'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe'
+PREHOOK: type: ALTERTABLE_SERIALIZER
+PREHOOK: Input: default@stats_nonpart2
+PREHOOK: Output: default@stats_nonpart2
+POSTHOOK: query: alter table stats_nonpart2 set serde 'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe'
+POSTHOOK: type: ALTERTABLE_SERIALIZER
+POSTHOOK: Input: default@stats_nonpart2
+POSTHOOK: Output: default@stats_nonpart2
+PREHOOK: query: alter table stats_nonpart2 set serdeproperties ("foo"="bar")
+PREHOOK: type: ALTERTABLE_SERDEPROPERTIES
+PREHOOK: Input: default@stats_nonpart2
+PREHOOK: Output: default@stats_nonpart2
+POSTHOOK: query: alter table stats_nonpart2 set serdeproperties ("foo"="bar")
+POSTHOOK: type: ALTERTABLE_SERDEPROPERTIES
+POSTHOOK: Input: default@stats_nonpart2
+POSTHOOK: Output: default@stats_nonpart2
+PREHOOK: query: alter table stats_nonpart2 set fileformat rcfile
+PREHOOK: type: ALTERTABLE_FILEFORMAT
+PREHOOK: Input: default@stats_nonpart2
+PREHOOK: Output: default@stats_nonpart2
+POSTHOOK: query: alter table stats_nonpart2 set fileformat rcfile
+POSTHOOK: type: ALTERTABLE_FILEFORMAT
+POSTHOOK: Input: default@stats_nonpart2
+POSTHOOK: Output: default@stats_nonpart2
+PREHOOK: query: explain select count(key2) from stats_nonpart2
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count(key2) from stats_nonpart2
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: 1
+      Processor Tree:
+        ListSink
+
+#### A masked pattern was here ####
+PREHOOK: type: ALTERTABLE_LOCATION
+PREHOOK: Input: default@stats_nonpart2
+PREHOOK: Output: default@stats_nonpart2
+#### A masked pattern was here ####
+POSTHOOK: type: ALTERTABLE_LOCATION
+POSTHOOK: Input: default@stats_nonpart2
+POSTHOOK: Output: default@stats_nonpart2
+#### A masked pattern was here ####
+PREHOOK: query: explain select count(key2) from stats_nonpart2
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count(key2) from stats_nonpart2
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: stats_nonpart2
+            Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key2 (type: int)
+              outputColumnNames: key2
+              Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                aggregations: count(key2)
+                mode: hash
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col0 (type: bigint)
+      Execution mode: vectorized
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: insert overwrite table stats_nonpart2 values (1, "foo")
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@stats_nonpart2
+POSTHOOK: query: insert overwrite table stats_nonpart2 values (1, "foo")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@stats_nonpart2
+POSTHOOK: Lineage: stats_nonpart2.key2 SCRIPT []
+POSTHOOK: Lineage: stats_nonpart2.value SCRIPT []
+PREHOOK: query: explain select count(key2) from stats_nonpart2
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count(key2) from stats_nonpart2
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: 1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: alter table stats_nonpart2 add constraint primary_key primary key (key2) disable novalidate rely
+PREHOOK: type: ALTERTABLE_ADDCONSTRAINT
+POSTHOOK: query: alter table stats_nonpart2 add constraint primary_key primary key (key2) disable novalidate rely
+POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT
+PREHOOK: query: alter table stats_nonpart2 drop constraint primary_key
+PREHOOK: type: ALTERTABLE_DROPCONSTRAINT
+POSTHOOK: query: alter table stats_nonpart2 drop constraint primary_key
+POSTHOOK: type: ALTERTABLE_DROPCONSTRAINT
+PREHOOK: query: explain select count(key2) from stats_nonpart2
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count(key2) from stats_nonpart2
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: 1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: alter table stats_nonpart2 clustered by (key2) INTO 2 BUCKETS
+PREHOOK: type: ALTERTABLE_CLUSTER_SORT
+PREHOOK: Input: default@stats_nonpart2
+PREHOOK: Output: default@stats_nonpart2
+POSTHOOK: query: alter table stats_nonpart2 clustered by (key2) INTO 2 BUCKETS
+POSTHOOK: type: ALTERTABLE_CLUSTER_SORT
+POSTHOOK: Input: default@stats_nonpart2
+POSTHOOK: Output: default@stats_nonpart2
+PREHOOK: query: explain select count(key2) from stats_nonpart2
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count(key2) from stats_nonpart2
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: 1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: insert into table stats_nonpart2 values (2, "foo")
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@stats_nonpart2
+POSTHOOK: query: insert into table stats_nonpart2 values (2, "foo")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@stats_nonpart2
+POSTHOOK: Lineage: stats_nonpart2.key2 SCRIPT []
+POSTHOOK: Lineage: stats_nonpart2.value SCRIPT []
+PREHOOK: query: explain select count(key2) from stats_nonpart2
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count(key2) from stats_nonpart2
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: 1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: drop table stats_nonpart2
 PREHOOK: type: DROPTABLE
-POSTHOOK: query: drop table stats_nonpart
+PREHOOK: Input: default@stats_nonpart2
+PREHOOK: Output: default@stats_nonpart2
+POSTHOOK: query: drop table stats_nonpart2
 POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@stats_nonpart2
+POSTHOOK: Output: default@stats_nonpart2
 PREHOOK: query: create table stats_part(key int,value string) partitioned by (p int) tblproperties ("transactional"="true", "transactional_properties"="insert_only")
 PREHOOK: type: CREATETABLE
 PREHOOK: Output: database:default
@@ -451,6 +643,191 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
+PREHOOK: query: alter table stats_part add partition(p=105)
+PREHOOK: type: ALTERTABLE_ADDPARTS
+PREHOOK: Output: default@stats_part
+POSTHOOK: query: alter table stats_part add partition(p=105)
+POSTHOOK: type: ALTERTABLE_ADDPARTS
+POSTHOOK: Output: default@stats_part
+POSTHOOK: Output: default@stats_part@p=105
+PREHOOK: query: explain select count(key2) from stats_part
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count(key2) from stats_part
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: stats_part
+            Statistics: Num rows: 3 Data size: 12 Basic stats: PARTIAL Column stats: PARTIAL
+            Select Operator
+              expressions: key2 (type: int)
+              outputColumnNames: key2
+              Statistics: Num rows: 3 Data size: 12 Basic stats: PARTIAL Column stats: PARTIAL
+              Group By Operator
+                aggregations: count(key2)
+                mode: hash
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 12 Basic stats: PARTIAL Column stats: PARTIAL
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 1 Data size: 12 Basic stats: PARTIAL Column stats: PARTIAL
+                  value expressions: _col0 (type: bigint)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 1 Data size: 12 Basic stats: PARTIAL Column stats: PARTIAL
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 12 Basic stats: PARTIAL Column stats: PARTIAL
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: analyze table stats_part partition(p) compute statistics for columns
+PREHOOK: type: ANALYZE_TABLE
+PREHOOK: Input: default@stats_part
+PREHOOK: Input: default@stats_part@p=101
+PREHOOK: Input: default@stats_part@p=103
+PREHOOK: Input: default@stats_part@p=104
+PREHOOK: Input: default@stats_part@p=105
+PREHOOK: Output: default@stats_part
+PREHOOK: Output: default@stats_part@p=101
+PREHOOK: Output: default@stats_part@p=103
+PREHOOK: Output: default@stats_part@p=104
+PREHOOK: Output: default@stats_part@p=105
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table stats_part partition(p) compute statistics for columns
+POSTHOOK: type: ANALYZE_TABLE
+POSTHOOK: Input: default@stats_part
+POSTHOOK: Input: default@stats_part@p=101
+POSTHOOK: Input: default@stats_part@p=103
+POSTHOOK: Input: default@stats_part@p=104
+POSTHOOK: Input: default@stats_part@p=105
+POSTHOOK: Output: default@stats_part
+POSTHOOK: Output: default@stats_part@p=101
+POSTHOOK: Output: default@stats_part@p=103
+POSTHOOK: Output: default@stats_part@p=104
+POSTHOOK: Output: default@stats_part@p=105
+#### A masked pattern was here ####
+PREHOOK: query: explain select count(key2) from stats_part
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count(key2) from stats_part
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: stats_part
+            Statistics: Num rows: 3 Data size: 12 Basic stats: PARTIAL Column stats: PARTIAL
+            Select Operator
+              expressions: key2 (type: int)
+              outputColumnNames: key2
+              Statistics: Num rows: 3 Data size: 12 Basic stats: PARTIAL Column stats: PARTIAL
+              Group By Operator
+                aggregations: count(key2)
+                mode: hash
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 12 Basic stats: PARTIAL Column stats: PARTIAL
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 1 Data size: 12 Basic stats: PARTIAL Column stats: PARTIAL
+                  value expressions: _col0 (type: bigint)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 1 Data size: 12 Basic stats: PARTIAL Column stats: PARTIAL
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 12 Basic stats: PARTIAL Column stats: PARTIAL
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: alter table stats_part drop partition(p=104)
+PREHOOK: type: ALTERTABLE_DROPPARTS
+PREHOOK: Input: default@stats_part
+PREHOOK: Output: default@stats_part@p=104
+POSTHOOK: query: alter table stats_part drop partition(p=104)
+POSTHOOK: type: ALTERTABLE_DROPPARTS
+POSTHOOK: Input: default@stats_part
+POSTHOOK: Output: default@stats_part@p=104
+PREHOOK: query: explain select count(key2) from stats_part
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count(key2) from stats_part
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: stats_part
+            Statistics: Num rows: 2 Data size: 8 Basic stats: PARTIAL Column stats: COMPLETE
+            Select Operator
+              expressions: key2 (type: int)
+              outputColumnNames: key2
+              Statistics: Num rows: 2 Data size: 8 Basic stats: PARTIAL Column stats: COMPLETE
+              Group By Operator
+                aggregations: count(key2)
+                mode: hash
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 12 Basic stats: PARTIAL Column stats: COMPLETE
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 1 Data size: 12 Basic stats: PARTIAL Column stats: COMPLETE
+                  value expressions: _col0 (type: bigint)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 1 Data size: 12 Basic stats: PARTIAL Column stats: COMPLETE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 12 Basic stats: PARTIAL Column stats: COMPLETE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
 PREHOOK: query: drop table stats_part
 PREHOOK: type: DROPTABLE
 PREHOOK: Input: default@stats_part


[3/3] hive git commit: HIVE-20399 : CTAS w/a custom table location that is not fully qualified fails for MM tables (Sergey Shelukhin, reviewed by Ashutosh Chauhan)

Posted by se...@apache.org.
HIVE-20399 : CTAS w/a custom table location that is not fully qualified fails for MM tables (Sergey Shelukhin, reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/611770dd
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/611770dd
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/611770dd

Branch: refs/heads/master
Commit: 611770ddf9ab7f9c860468dc58b029b4e884beb4
Parents: ec965fa
Author: sergey <se...@apache.org>
Authored: Thu Aug 23 12:20:39 2018 -0700
Committer: sergey <se...@apache.org>
Committed: Thu Aug 23 12:20:39 2018 -0700

----------------------------------------------------------------------
 .../apache/hadoop/hive/ql/exec/Utilities.java   | 13 +++----
 .../test/queries/clientpositive/mm_loc_ctas.q   | 19 ++++++++++
 .../results/clientpositive/mm_loc_ctas.q.out    | 38 ++++++++++++++++++++
 3 files changed, 64 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/611770dd/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
index 30bee18..74fb1ba 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
@@ -4277,14 +4277,15 @@ public final class Utilities {
       }
     }
 
-    HashSet<String> committed = new HashSet<>();
+    HashSet<Path> committed = new HashSet<>();
     for (Path mfp : manifests) {
       try (FSDataInputStream mdis = fs.open(mfp)) {
         int fileCount = mdis.readInt();
         for (int i = 0; i < fileCount; ++i) {
           String nextFile = mdis.readUTF();
           Utilities.FILE_OP_LOGGER.trace("Looking at committed file: {}", nextFile);
-          if (!committed.add(nextFile)) {
+          Path path = fs.makeQualified(new Path(nextFile));
+          if (!committed.add(path)) {
             throw new HiveException(nextFile + " was specified in multiple manifests");
           }
         }
@@ -4345,7 +4346,7 @@ public final class Utilities {
   }
 
   private static void cleanMmDirectory(Path dir, FileSystem fs, String unionSuffix,
-      int lbLevels, HashSet<String> committed) throws IOException, HiveException {
+      int lbLevels, HashSet<Path> committed) throws IOException, HiveException {
     for (FileStatus child : fs.listStatus(dir)) {
       Path childPath = child.getPath();
       if (lbLevels > 0) {
@@ -4357,7 +4358,7 @@ public final class Utilities {
               "Recursion into LB directory {}; levels remaining ", childPath, lbLevels - 1);
           cleanMmDirectory(childPath, fs, unionSuffix, lbLevels - 1, committed);
         } else {
-          if (committed.contains(childPath.toString())) {
+          if (committed.contains(childPath)) {
             throw new HiveException("LB FSOP has commited "
                 + childPath + " outside of LB directory levels " + lbLevels);
           }
@@ -4367,12 +4368,12 @@ public final class Utilities {
       }
       // No more LB directories expected.
       if (unionSuffix == null) {
-        if (committed.remove(childPath.toString())) {
+        if (committed.remove(childPath)) {
           continue; // A good file.
         }
         deleteUncommitedFile(childPath, fs);
       } else if (!child.isDirectory()) {
-        if (committed.contains(childPath.toString())) {
+        if (committed.contains(childPath)) {
           throw new HiveException("Union FSOP has commited "
               + childPath + " outside of union directory " + unionSuffix);
         }

http://git-wip-us.apache.org/repos/asf/hive/blob/611770dd/ql/src/test/queries/clientpositive/mm_loc_ctas.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/mm_loc_ctas.q b/ql/src/test/queries/clientpositive/mm_loc_ctas.q
new file mode 100644
index 0000000..8e4cbbb
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/mm_loc_ctas.q
@@ -0,0 +1,19 @@
+--! qt:dataset:src
+
+set hive.metastore.dml.events=true;
+set hive.mapred.mode=nonstrict;
+set hive.explain.user=false;
+set hive.fetch.task.conversion=none;
+set tez.grouping.min-size=1;
+set tez.grouping.max-size=2;
+set hive.exec.dynamic.partition.mode=nonstrict;
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+
+
+drop table test;
+create table test(id int, name string);
+insert into test values(1, 'aa'),(2,'bb');
+
+drop table test3;
+CREATE TABLE test3 stored as textfile LOCATION '${system:test.tmp.dir}/test2' tblproperties('transactional'='true', 'transactional_properties'='insert_only') AS SELECT * from test;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/611770dd/ql/src/test/results/clientpositive/mm_loc_ctas.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/mm_loc_ctas.q.out b/ql/src/test/results/clientpositive/mm_loc_ctas.q.out
new file mode 100644
index 0000000..471d835
--- /dev/null
+++ b/ql/src/test/results/clientpositive/mm_loc_ctas.q.out
@@ -0,0 +1,38 @@
+PREHOOK: query: drop table test
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table test
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table test(id int, name string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test
+POSTHOOK: query: create table test(id int, name string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test
+PREHOOK: query: insert into test values(1, 'aa'),(2,'bb')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@test
+POSTHOOK: query: insert into test values(1, 'aa'),(2,'bb')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@test
+POSTHOOK: Lineage: test.id SCRIPT []
+POSTHOOK: Lineage: test.name SCRIPT []
+PREHOOK: query: drop table test3
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table test3
+POSTHOOK: type: DROPTABLE
+#### A masked pattern was here ####
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@test
+#### A masked pattern was here ####
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test3
+#### A masked pattern was here ####
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@test
+#### A masked pattern was here ####
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test3