You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ek...@apache.org on 2016/05/19 19:29:25 UTC

[6/6] hive git commit: HIVE-13622 WriteSet tracking optimizations (Eugene Koifman, reviewed by Alan Gates)

HIVE-13622 WriteSet tracking optimizations (Eugene Koifman, reviewed by Alan Gates)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f25b8652
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f25b8652
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f25b8652

Branch: refs/heads/master
Commit: f25b86520d8af5cfad3a0311281bc8e70117baa4
Parents: ffa69a2
Author: Eugene Koifman <ek...@hortonworks.com>
Authored: Thu May 19 11:59:45 2016 -0700
Committer: Eugene Koifman <ek...@hortonworks.com>
Committed: Thu May 19 11:59:45 2016 -0700

----------------------------------------------------------------------
 .../hive/hcatalog/streaming/HiveEndPoint.java   |    4 +-
 .../streaming/mutate/client/lock/Lock.java      |    7 +-
 .../streaming/mutate/client/lock/TestLock.java  |   11 +
 .../hive/metastore/TestHiveMetaStoreTxns.java   |    7 +
 metastore/if/hive_metastore.thrift              |   12 +
 .../gen/thrift/gen-cpp/ThriftHiveMetastore.cpp  | 2020 +++++------
 .../gen/thrift/gen-cpp/hive_metastore_types.cpp | 1245 +++----
 .../gen/thrift/gen-cpp/hive_metastore_types.h   |   52 +-
 .../metastore/api/AddDynamicPartitions.java     |  127 +-
 .../hive/metastore/api/DataOperationType.java   |   57 +
 .../hive/metastore/api/LockComponent.java       |  234 +-
 .../src/gen/thrift/gen-php/metastore/Types.php  |   86 +
 .../gen/thrift/gen-py/hive_metastore/ttypes.py  |   69 +-
 .../gen/thrift/gen-rb/hive_metastore_types.rb   |   27 +-
 .../hive/metastore/HiveMetaStoreClient.java     |    9 +
 .../hadoop/hive/metastore/IMetaStoreClient.java |   10 +-
 .../hive/metastore/LockComponentBuilder.java    |   10 +
 .../hive/metastore/LockRequestBuilder.java      |    9 +
 .../hadoop/hive/metastore/txn/TxnHandler.java   |  108 +-
 .../hive/metastore/txn/TxnHandler.java.orig     | 3233 ++++++++++++++++++
 .../metastore/txn/TestCompactionTxnHandler.java |   15 +-
 .../hive/metastore/txn/TestTxnHandler.java      |   85 +
 .../apache/hadoop/hive/ql/exec/MoveTask.java    |    3 +-
 .../org/apache/hadoop/hive/ql/io/AcidUtils.java |   16 +-
 .../hadoop/hive/ql/lockmgr/DbTxnManager.java    |   34 +-
 .../apache/hadoop/hive/ql/metadata/Hive.java    |    6 +-
 .../hive/ql/lockmgr/TestDbTxnManager2.java      |  181 +-
 .../hive/ql/txn/compactor/TestCleaner.java      |    7 +
 .../hive/ql/txn/compactor/TestInitiator.java    |   21 +
 29 files changed, 5992 insertions(+), 1713 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/f25b8652/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/HiveEndPoint.java
----------------------------------------------------------------------
diff --git a/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/HiveEndPoint.java b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/HiveEndPoint.java
index db9fd72..cb64fff 100644
--- a/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/HiveEndPoint.java
+++ b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/HiveEndPoint.java
@@ -19,6 +19,7 @@
 package org.apache.hive.hcatalog.streaming;
 
 import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.hive.metastore.api.DataOperationType;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.hadoop.hive.cli.CliSessionState;
@@ -964,7 +965,8 @@ public class HiveEndPoint {
       LockComponentBuilder lockCompBuilder = new LockComponentBuilder()
               .setDbName(hiveEndPoint.database)
               .setTableName(hiveEndPoint.table)
-              .setShared();
+              .setShared()
+              .setOperationType(DataOperationType.INSERT);
       if (partNameForLock!=null && !partNameForLock.isEmpty() ) {
           lockCompBuilder.setPartitionName(partNameForLock);
       }

http://git-wip-us.apache.org/repos/asf/hive/blob/f25b8652/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/client/lock/Lock.java
----------------------------------------------------------------------
diff --git a/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/client/lock/Lock.java b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/client/lock/Lock.java
index 17fa91a..c272837 100644
--- a/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/client/lock/Lock.java
+++ b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/client/lock/Lock.java
@@ -31,6 +31,7 @@ import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.IMetaStoreClient;
 import org.apache.hadoop.hive.metastore.LockComponentBuilder;
 import org.apache.hadoop.hive.metastore.LockRequestBuilder;
+import org.apache.hadoop.hive.metastore.api.DataOperationType;
 import org.apache.hadoop.hive.metastore.api.LockComponent;
 import org.apache.hadoop.hive.metastore.api.LockRequest;
 import org.apache.hadoop.hive.metastore.api.LockResponse;
@@ -178,10 +179,12 @@ public class Lock {
     for (Table table : tables) {
       LockComponentBuilder componentBuilder = new LockComponentBuilder().setDbName(table.getDbName()).setTableName(
           table.getTableName());
+      //todo: DataOperationType is set conservatively here, we'd really want to distinguish update/delete
+      //and insert/select and if resource (that is written to) is ACID or not
       if (sinks.contains(table)) {
-        componentBuilder.setSemiShared();
+        componentBuilder.setSemiShared().setOperationType(DataOperationType.UPDATE).setIsAcid(true);
       } else {
-        componentBuilder.setShared();
+        componentBuilder.setShared().setOperationType(DataOperationType.INSERT).setIsAcid(true);
       }
       LockComponent component = componentBuilder.build();
       requestBuilder.addLockComponent(component);

http://git-wip-us.apache.org/repos/asf/hive/blob/f25b8652/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/client/lock/TestLock.java
----------------------------------------------------------------------
diff --git a/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/client/lock/TestLock.java b/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/client/lock/TestLock.java
index cf56176..e454942 100644
--- a/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/client/lock/TestLock.java
+++ b/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/client/lock/TestLock.java
@@ -43,6 +43,7 @@ import java.util.Timer;
 
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.IMetaStoreClient;
+import org.apache.hadoop.hive.metastore.api.DataOperationType;
 import org.apache.hadoop.hive.metastore.api.LockComponent;
 import org.apache.hadoop.hive.metastore.api.LockLevel;
 import org.apache.hadoop.hive.metastore.api.LockRequest;
@@ -174,10 +175,14 @@ public class TestLock {
 
     LockComponent expected1 = new LockComponent(LockType.SHARED_READ, LockLevel.TABLE, "DB");
     expected1.setTablename("SOURCE_1");
+    expected1.setOperationType(DataOperationType.INSERT);
+    expected1.setIsAcid(true);
     assertTrue(components.contains(expected1));
 
     LockComponent expected2 = new LockComponent(LockType.SHARED_READ, LockLevel.TABLE, "DB");
     expected2.setTablename("SOURCE_2");
+    expected2.setOperationType(DataOperationType.INSERT);
+    expected2.setIsAcid(true);
     assertTrue(components.contains(expected2));
   }
 
@@ -197,14 +202,20 @@ public class TestLock {
 
     LockComponent expected1 = new LockComponent(LockType.SHARED_READ, LockLevel.TABLE, "DB");
     expected1.setTablename("SOURCE_1");
+    expected1.setOperationType(DataOperationType.INSERT);
+    expected1.setIsAcid(true);
     assertTrue(components.contains(expected1));
 
     LockComponent expected2 = new LockComponent(LockType.SHARED_READ, LockLevel.TABLE, "DB");
     expected2.setTablename("SOURCE_2");
+    expected2.setOperationType(DataOperationType.INSERT);
+    expected2.setIsAcid(true);
     assertTrue(components.contains(expected2));
 
     LockComponent expected3 = new LockComponent(LockType.SHARED_WRITE, LockLevel.TABLE, "DB");
     expected3.setTablename("SINK");
+    expected3.setOperationType(DataOperationType.UPDATE);
+    expected3.setIsAcid(true);
     assertTrue(components.contains(expected3));
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/f25b8652/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStoreTxns.java
----------------------------------------------------------------------
diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStoreTxns.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStoreTxns.java
index 22354ab..997f73e 100644
--- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStoreTxns.java
+++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStoreTxns.java
@@ -22,6 +22,7 @@ import junit.framework.Assert;
 import org.apache.hadoop.hive.common.ValidTxnList;
 import org.apache.hadoop.hive.common.ValidReadTxnList;
 import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.api.DataOperationType;
 import org.apache.hadoop.hive.metastore.api.HeartbeatTxnRangeResponse;
 import org.apache.hadoop.hive.metastore.api.LockResponse;
 import org.apache.hadoop.hive.metastore.api.LockState;
@@ -152,14 +153,17 @@ public class TestHiveMetaStoreTxns {
         .setTableName("mytable")
         .setPartitionName("mypartition")
         .setExclusive()
+        .setOperationType(DataOperationType.NO_TXN)
         .build());
     rqstBuilder.addLockComponent(new LockComponentBuilder()
         .setDbName("mydb")
         .setTableName("yourtable")
         .setSemiShared()
+        .setOperationType(DataOperationType.NO_TXN)
         .build());
     rqstBuilder.addLockComponent(new LockComponentBuilder()
         .setDbName("yourdb")
+        .setOperationType(DataOperationType.NO_TXN)
         .setShared()
         .build());
     rqstBuilder.setUser("fred");
@@ -188,15 +192,18 @@ public class TestHiveMetaStoreTxns {
         .setTableName("mytable")
         .setPartitionName("mypartition")
         .setSemiShared()
+        .setOperationType(DataOperationType.UPDATE)
         .build())
       .addLockComponent(new LockComponentBuilder()
         .setDbName("mydb")
         .setTableName("yourtable")
         .setSemiShared()
+        .setOperationType(DataOperationType.UPDATE)
         .build())
       .addLockComponent(new LockComponentBuilder()
         .setDbName("yourdb")
         .setShared()
+        .setOperationType(DataOperationType.SELECT)
         .build())
       .setUser("fred");
 

http://git-wip-us.apache.org/repos/asf/hive/blob/f25b8652/metastore/if/hive_metastore.thrift
----------------------------------------------------------------------
diff --git a/metastore/if/hive_metastore.thrift b/metastore/if/hive_metastore.thrift
index f8e56c7..738456c 100755
--- a/metastore/if/hive_metastore.thrift
+++ b/metastore/if/hive_metastore.thrift
@@ -134,6 +134,15 @@ enum GrantRevokeType {
     REVOKE = 2,
 }
 
+enum DataOperationType {
+    SELECT = 1,
+    INSERT = 2
+    UPDATE = 3,
+    DELETE = 4,
+    UNSET = 5,//this is the default to distinguish from NULL from old clients
+    NO_TXN = 6,//drop table, insert overwrite, etc - something non-transactional
+}
+
 // Types of events the client can request that the metastore fire.  For now just support DML operations, as the metastore knows
 // about DDL operations and there's no reason for the client to request such an event.
 enum EventRequestType {
@@ -657,6 +666,8 @@ struct LockComponent {
     3: required string dbname,
     4: optional string tablename,
     5: optional string partitionname,
+    6: optional DataOperationType operationType = DataOperationType.UNSET,
+    7: optional bool isAcid = false
 }
 
 struct LockRequest {
@@ -762,6 +773,7 @@ struct AddDynamicPartitions {
     2: required string dbname,
     3: required string tablename,
     4: required list<string> partitionnames,
+    5: optional DataOperationType operationType = DataOperationType.UNSET
 }
 
 struct NotificationEventRequest {