You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by to...@apache.org on 2017/11/03 00:27:31 UTC

[1/5] kudu git commit: KUDU-2200: provide better diagnostics when connecting to a subset of masters

Repository: kudu
Updated Branches:
  refs/heads/master 9dd64e1b3 -> b418e88b6


KUDU-2200: provide better diagnostics when connecting to a subset of masters

This changes the ConnectToMaster RPC to send back the list of the master
peers, and then changes the clients to use this information to provide
better error messages in the case that the user has specified only a
subset of the live masters.

Note that this patch follows a "first do no harm" philosophy. In the
case that the user "gets lucky" and only specifies one of their three
masters, and that master happens to be the leader, we'll continue to
treat that as "successful".

Change-Id: I52f903e1aa5ae6948ca1ba6d4d856c3c9dc73d56
Reviewed-on: http://gerrit.cloudera.org:8080/8393
Reviewed-by: Alexey Serbin <as...@cloudera.com>
Tested-by: Kudu Jenkins


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/3e86797e
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/3e86797e
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/3e86797e

Branch: refs/heads/master
Commit: 3e86797e6e73365c26b8826083c447494c7fa7eb
Parents: 9dd64e1
Author: Todd Lipcon <to...@apache.org>
Authored: Wed Oct 25 16:36:40 2017 -0700
Committer: Todd Lipcon <to...@apache.org>
Committed: Thu Nov 2 05:39:01 2017 +0000

----------------------------------------------------------------------
 .../apache/kudu/client/ConnectToCluster.java    | 47 +++++++++++++++++++-
 .../org/apache/kudu/client/ProtobufHelper.java  | 12 +++++
 .../kudu/client/TestConnectToCluster.java       | 47 ++++++++++++++++++++
 src/kudu/client/master_rpc.cc                   | 40 ++++++++++++++---
 .../master_replication-itest.cc                 | 47 ++++++++++++++++++++
 src/kudu/master/master-test.cc                  |  4 ++
 src/kudu/master/master.cc                       | 31 +++++++++++++
 src/kudu/master/master.h                        | 11 ++++-
 src/kudu/master/master.proto                    | 12 +++++
 src/kudu/master/master_service.cc               | 14 ++++++
 10 files changed, 256 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/3e86797e/java/kudu-client/src/main/java/org/apache/kudu/client/ConnectToCluster.java
----------------------------------------------------------------------
diff --git a/java/kudu-client/src/main/java/org/apache/kudu/client/ConnectToCluster.java b/java/kudu-client/src/main/java/org/apache/kudu/client/ConnectToCluster.java
index 832bbbc..d69bd76 100644
--- a/java/kudu-client/src/main/java/org/apache/kudu/client/ConnectToCluster.java
+++ b/java/kudu-client/src/main/java/org/apache/kudu/client/ConnectToCluster.java
@@ -22,6 +22,7 @@ import java.util.Collections;
 import java.util.List;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.atomic.AtomicReference;
 
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Functions;
@@ -34,7 +35,7 @@ import com.stumbleupon.async.Deferred;
 import org.apache.yetus.audience.InterfaceAudience;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
-
+import org.apache.kudu.Common.HostPortPB;
 import org.apache.kudu.consensus.Metadata.RaftPeerPB.Role;
 import org.apache.kudu.master.Master.ConnectToMasterResponsePB;
 import org.apache.kudu.rpc.RpcHeader.ErrorStatusPB.RpcErrorCodePB;
@@ -64,6 +65,12 @@ final class ConnectToCluster {
       Collections.synchronizedList(new ArrayList<Exception>());
 
   /**
+   * If we've received a response from a master which indicates the full
+   * list of masters in the cluster, it is stored here. Otherwise, null.
+   */
+  private AtomicReference<List<HostPortPB>> knownMasters = new AtomicReference<>();
+
+  /**
    * Creates an object that holds the state needed to retrieve master table's location.
    * @param masterAddrs Addresses of all master replicas that we want to retrieve the
    *                    registration from.
@@ -222,6 +229,34 @@ final class ConnectToCluster {
         Status s = Status.ServiceUnavailable(msg);
         responseD.callback(new NonRecoverableException(s));
       } else {
+        // We couldn't find a leader master. A common case here is that the user only
+        // specified a subset of the masters, so check for that. We could try to do
+        // something fancier like compare the actual host/ports to see if they don't
+        // match, but it's possible that the hostnames used by clients are not the
+        // same as the hostnames that the servers use for each other in some network
+        // setups.
+
+        List<HostPortPB> knownMastersLocal = knownMasters.get();
+        if (knownMastersLocal != null &&
+            knownMastersLocal.size() != numMasters) {
+          String msg = String.format(
+              "Could not connect to a leader master. " +
+              "Client configured with %s master(s) (%s) but cluster indicates it expects " +
+              "%s master(s) (%s)",
+              numMasters, allHosts,
+              knownMastersLocal.size(),
+              ProtobufHelper.hostPortPbListToString(knownMastersLocal));
+          LOG.warn(msg);
+          Exception e = new NonRecoverableException(Status.ConfigurationError(msg));
+          if (!LOG.isDebugEnabled()) {
+            // Stack trace is just internal guts of netty, etc, no need for the detail
+            // level.
+            e.setStackTrace(new StackTraceElement[]{});
+          }
+          responseD.callback(e);
+          return;
+        }
+
         String message = String.format("Master config (%s) has no leader.",
             allHosts);
         Exception ex;
@@ -242,6 +277,15 @@ final class ConnectToCluster {
     }
   }
 
+  private void recordKnownMasters(ConnectToMasterResponsePB r) {
+    // Old versions don't set this field.
+    if (r.getMasterAddrsCount() == 0) {
+      return;
+    }
+
+    knownMasters.compareAndSet(null, r.getMasterAddrsList());
+  }
+
   /**
    * Callback for each ConnectToCluster RPC sent in connectToMaster() above.
    * If a request (paired to a specific master) returns a reply that indicates it's a leader,
@@ -260,6 +304,7 @@ final class ConnectToCluster {
 
     @Override
     public Void call(ConnectToMasterResponsePB r) throws Exception {
+      recordKnownMasters(r);
       if (!r.getRole().equals(Role.LEADER)) {
         incrementCountAndCheckExhausted();
         return null;

http://git-wip-us.apache.org/repos/asf/kudu/blob/3e86797e/java/kudu-client/src/main/java/org/apache/kudu/client/ProtobufHelper.java
----------------------------------------------------------------------
diff --git a/java/kudu-client/src/main/java/org/apache/kudu/client/ProtobufHelper.java b/java/kudu-client/src/main/java/org/apache/kudu/client/ProtobufHelper.java
index af0963f..c816e3d 100644
--- a/java/kudu-client/src/main/java/org/apache/kudu/client/ProtobufHelper.java
+++ b/java/kudu-client/src/main/java/org/apache/kudu/client/ProtobufHelper.java
@@ -23,6 +23,7 @@ import java.util.ArrayList;
 import java.util.List;
 
 import com.google.common.base.Charsets;
+import com.google.common.base.Joiner;
 import com.google.common.collect.ImmutableList;
 import com.google.common.net.HostAndPort;
 import com.google.protobuf.ByteString;
@@ -301,4 +302,15 @@ public class ProtobufHelper {
   public static HostAndPort hostAndPortFromPB(Common.HostPortPB hostPortPB) {
     return HostAndPort.fromParts(hostPortPB.getHost(), hostPortPB.getPort());
   }
+
+  /**
+   * Convert a list of HostPortPBs into a comma-separated string.
+   */
+  public static String hostPortPbListToString(List<Common.HostPortPB> pbs) {
+    List<String> strs = new ArrayList<>(pbs.size());
+    for (Common.HostPortPB pb : pbs) {
+      strs.add(pb.getHost() + ":" + pb.getPort());
+    }
+    return Joiner.on(',').join(strs);
+  }
 }

http://git-wip-us.apache.org/repos/asf/kudu/blob/3e86797e/java/kudu-client/src/test/java/org/apache/kudu/client/TestConnectToCluster.java
----------------------------------------------------------------------
diff --git a/java/kudu-client/src/test/java/org/apache/kudu/client/TestConnectToCluster.java b/java/kudu-client/src/test/java/org/apache/kudu/client/TestConnectToCluster.java
index 30727ed..d467c8c 100644
--- a/java/kudu-client/src/test/java/org/apache/kudu/client/TestConnectToCluster.java
+++ b/java/kudu-client/src/test/java/org/apache/kudu/client/TestConnectToCluster.java
@@ -27,6 +27,8 @@ import java.util.List;
 import com.google.common.collect.ImmutableList;
 import com.google.common.net.HostAndPort;
 import com.stumbleupon.async.Callback;
+
+import org.junit.Assert;
 import org.junit.Test;
 
 import org.apache.kudu.consensus.Metadata;
@@ -67,6 +69,51 @@ public class TestConnectToCluster {
   }
 
   /**
+   * Test for KUDU-2200: if a cluster is running multiple masters, but
+   * the user only specifies one of them in the connection string,
+   * the resulting exception should clarify their error rather than
+   * saying that no leader was found.
+   */
+  @Test(timeout=60000)
+  public void testConnectToOneOfManyMasters() throws Exception {
+    MiniKuduCluster cluster = new MiniKuduCluster.MiniKuduClusterBuilder()
+        .numMasters(3)
+        .numTservers(0)
+        .build();
+    int successes = 0;
+    try {
+      String[] masterAddrs = cluster.getMasterAddresses().split(",");
+      assertEquals(3, masterAddrs.length);
+      for (String masterAddr : masterAddrs) {
+        KuduClient c = null;
+        try {
+          c = new KuduClient.KuduClientBuilder(masterAddr).build();
+          // Call some method which uses the master. This forces us to connect.
+          c.listTabletServers();
+          successes++;
+        } catch (Exception e) {
+          Assert.assertTrue("unexpected exception: " + e.toString(),
+              e.toString().matches(
+                  ".*Client configured with 1 master\\(s\\) " +
+                  "\\(.+?\\) but cluster indicates it expects 3 master\\(s\\) " +
+                  "\\(.+?,.+?,.+?\\).*"));
+        } finally {
+          if (c != null) {
+            c.close();
+          }
+        }
+      }
+    } finally {
+      cluster.shutdown();
+    }
+    // Typically, one of the connections will have succeeded. However, it's possible
+    // that 0 succeeded in the case that the masters were slow at electing
+    // themselves.
+    Assert.assertTrue(successes <= 1);
+  }
+
+
+  /**
    * Unit test which checks that the ConnectToCluster aggregates the
    * responses from the different masters properly and returns the
    * response from the located leader.

http://git-wip-us.apache.org/repos/asf/kudu/blob/3e86797e/src/kudu/client/master_rpc.cc
----------------------------------------------------------------------
diff --git a/src/kudu/client/master_rpc.cc b/src/kudu/client/master_rpc.cc
index dcdc9a5..a03f5f7 100644
--- a/src/kudu/client/master_rpc.cc
+++ b/src/kudu/client/master_rpc.cc
@@ -27,6 +27,7 @@
 #include <boost/bind.hpp>
 #include <glog/logging.h>
 
+#include "kudu/common/common.pb.h"
 #include "kudu/common/wire_protocol.h"
 #include "kudu/consensus/metadata.pb.h"
 #include "kudu/gutil/basictypes.h"
@@ -318,13 +319,38 @@ void ConnectToClusterRpc::SingleNodeCallback(int master_idx,
     }
     if (new_status.ok()) {
       if (resp.role() != RaftPeerPB::LEADER) {
-        // Use a Status::NotFound() to indicate that the node is not
-        // the leader: this way, we can handle the case where we've
-        // received a reply from all of the nodes in the cluster (no
-        // network or other errors encountered), but haven't found a
-        // leader (which means that SendRpcCb() above can perform a
-        // delayed retry).
-        new_status = Status::NotFound("no leader found: " + ToString());
+        string msg;
+        if (resp.master_addrs_size() > 0 &&
+            resp.master_addrs_size() != addrs_with_names_.size()) {
+          // If we connected to a non-leader, and it responds that the
+          // number of masters in the cluster don't match the client's
+          // view of the number of masters, then it's likely the client
+          // is mis-configured (i.e with a subset of the masters).
+          // We'll include that info in the error message.
+          string client_config = JoinMapped(
+              addrs_with_names_,
+              [](const pair<Sockaddr, string>& addr_with_name) {
+                return Substitute("$0:$1", addr_with_name.second, addr_with_name.first.port());
+              }, ",");
+          string cluster_config = JoinMapped(
+              resp.master_addrs(),
+              [](const HostPortPB& pb) {
+                return Substitute("$0:$1", pb.host(), pb.port());
+              }, ",");
+          new_status = Status::ConfigurationError(Substitute(
+              "no leader master found. Client configured with $0 master(s) ($1) "
+              "but cluster indicates it expects $2 master(s) ($3)",
+              addrs_with_names_.size(), client_config,
+              resp.master_addrs_size(), cluster_config));
+        } else {
+          // Use a Status::NotFound() to indicate that the node is not
+          // the leader: this way, we can handle the case where we've
+          // received a reply from all of the nodes in the cluster (no
+          // network or other errors encountered), but haven't found a
+          // leader (which means that SendRpcCb() above can perform a
+          // delayed retry).
+          new_status = Status::NotFound("no leader found", ToString());
+        }
       } else {
         // We've found a leader.
         leader_idx_ = master_idx;

http://git-wip-us.apache.org/repos/asf/kudu/blob/3e86797e/src/kudu/integration-tests/master_replication-itest.cc
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/master_replication-itest.cc b/src/kudu/integration-tests/master_replication-itest.cc
index 80728ad..f75bca5 100644
--- a/src/kudu/integration-tests/master_replication-itest.cc
+++ b/src/kudu/integration-tests/master_replication-itest.cc
@@ -32,6 +32,7 @@
 #include "kudu/gutil/gscoped_ptr.h"
 #include "kudu/gutil/port.h"
 #include "kudu/gutil/ref_counted.h"
+#include "kudu/gutil/strings/substitute.h"
 #include "kudu/master/catalog_manager.h"
 #include "kudu/master/master.h"
 #include "kudu/master/master.pb.h"
@@ -52,6 +53,8 @@
 using std::string;
 using std::vector;
 
+using strings::Substitute;
+
 namespace kudu {
 namespace master {
 
@@ -288,5 +291,49 @@ TEST_F(MasterReplicationTest, TestMasterPeerSetsDontMatch) {
   ASSERT_STR_CONTAINS(s.ToString(), "55555");
 }
 
+TEST_F(MasterReplicationTest, TestConnectToClusterReturnsAddresses) {
+  for (int i = 0; i < cluster_->num_masters(); i++) {
+    SCOPED_TRACE(Substitute("Connecting to master $0", i));
+    auto proxy = cluster_->master_proxy(i);
+    rpc::RpcController rpc;
+    ConnectToMasterRequestPB req;
+    ConnectToMasterResponsePB resp;
+    ASSERT_OK(proxy->ConnectToMaster(req, &resp, &rpc));
+    ASSERT_EQ(cluster_->num_masters(), resp.master_addrs_size());
+    for (int j = 0; j < cluster_->num_masters(); j++) {
+      const auto& addr = resp.master_addrs(j);
+      ASSERT_EQ(cluster_->mini_master(j)->bound_rpc_addr().ToString(),
+                Substitute("$0:$1", addr.host(), addr.port()));
+    }
+  }
+}
+
+
+// Test for KUDU-2200: if a user specifies just one of the masters, and that master is a
+// follower, we should give a status message that explains their mistake.
+TEST_F(MasterReplicationTest, TestConnectToFollowerMasterOnly) {
+  int successes = 0;
+  for (int i = 0; i < cluster_->num_masters(); i++) {
+    SCOPED_TRACE(Substitute("Connecting to master $0", i));
+
+    shared_ptr<KuduClient> client;
+    KuduClientBuilder builder;
+    builder.add_master_server_addr(cluster_->mini_master(i)->bound_rpc_addr_str());
+    Status s = builder.Build(&client);
+    if (s.ok()) {
+      successes++;
+    } else {
+      ASSERT_STR_MATCHES(s.ToString(),
+                         R"(Configuration error: .*Client configured with 1 master\(s\) \(.+\) )"
+                         R"(but cluster indicates it expects 3.*)");
+    }
+  }
+  // It's possible that we get either 0 or 1 success in the above loop:
+  // - 0, in the case that no master had elected itself yet
+  // - 1, in the case that one master had become leader by the time we connected.
+  EXPECT_LE(successes, 1);
+}
+
+
 } // namespace master
 } // namespace kudu

http://git-wip-us.apache.org/repos/asf/kudu/blob/3e86797e/src/kudu/master/master-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/master/master-test.cc b/src/kudu/master/master-test.cc
index 30a914d..de0af5c 100644
--- a/src/kudu/master/master-test.cc
+++ b/src/kudu/master/master-test.cc
@@ -1424,6 +1424,10 @@ TEST_F(MasterTest, TestConnectToMaster) {
   security::TokenPB token;
   ASSERT_TRUE(token.ParseFromString(resp.authn_token().token_data()));
   ASSERT_TRUE(token.authn().has_username());
+
+  ASSERT_EQ(1, resp.master_addrs_size());
+  ASSERT_EQ("127.0.0.1", resp.master_addrs(0).host());
+  ASSERT_NE(0, resp.master_addrs(0).port());
 }
 
 // Test that the master signs its on server certificate when it becomes the leader,

http://git-wip-us.apache.org/repos/asf/kudu/blob/3e86797e/src/kudu/master/master.cc
----------------------------------------------------------------------
diff --git a/src/kudu/master/master.cc b/src/kudu/master/master.cc
index 0bb45cf..5aff500 100644
--- a/src/kudu/master/master.cc
+++ b/src/kudu/master/master.cc
@@ -30,14 +30,17 @@
 #include <glog/logging.h>
 
 #include "kudu/cfile/block_cache.h"
+#include "kudu/common/common.pb.h"
 #include "kudu/common/wire_protocol.h"
 #include "kudu/common/wire_protocol.pb.h"
 #include "kudu/consensus/metadata.pb.h"
+#include "kudu/consensus/raft_consensus.h"
 #include "kudu/fs/fs_manager.h"
 #include "kudu/gutil/bind.h"
 #include "kudu/gutil/bind_helpers.h"
 #include "kudu/gutil/map-util.h"
 #include "kudu/gutil/move.h"
+#include "kudu/gutil/ref_counted.h"
 #include "kudu/gutil/strings/substitute.h"
 #include "kudu/master/catalog_manager.h"
 #include "kudu/master/master.pb.h"
@@ -45,6 +48,7 @@
 #include "kudu/master/master_cert_authority.h"
 #include "kudu/master/master_path_handlers.h"
 #include "kudu/master/master_service.h"
+#include "kudu/master/sys_catalog.h"
 #include "kudu/master/ts_manager.h"
 #include "kudu/rpc/messenger.h"
 #include "kudu/rpc/rpc_controller.h"
@@ -52,6 +56,7 @@
 #include "kudu/security/token_signer.h"
 #include "kudu/server/rpc_server.h"
 #include "kudu/server/webserver.h"
+#include "kudu/tablet/tablet_replica.h"
 #include "kudu/tserver/tablet_copy_service.h"
 #include "kudu/tserver/tablet_service.h"
 #include "kudu/util/flag_tags.h"
@@ -330,5 +335,31 @@ Status Master::ListMasters(std::vector<ServerEntryPB>* masters) const {
   return Status::OK();
 }
 
+Status Master::GetMasterHostPorts(std::vector<HostPortPB>* hostports) const {
+  auto consensus = catalog_manager_->sys_catalog()->tablet_replica()->shared_consensus();
+  if (!consensus) {
+    return Status::IllegalState("consensus not running");
+  }
+
+  hostports->clear();
+  consensus::RaftConfigPB config = consensus->CommittedConfig();
+  for (auto& peer : *config.mutable_peers()) {
+    if (peer.member_type() == consensus::RaftPeerPB::VOTER) {
+      // In non-distributed master configurations, we don't store our own
+      // last known address in the Raft config. So, we'll fill it in from
+      // the server Registration instead.
+      if (!peer.has_last_known_addr()) {
+        DCHECK_EQ(config.peers_size(), 1);
+        DCHECK(registration_initialized_.load());
+        hostports->emplace_back(registration_.rpc_addresses(0));
+      } else {
+        hostports->emplace_back(std::move(*peer.mutable_last_known_addr()));
+      }
+    }
+  }
+  return Status::OK();
+}
+
+
 } // namespace master
 } // namespace kudu

http://git-wip-us.apache.org/repos/asf/kudu/blob/3e86797e/src/kudu/master/master.h
----------------------------------------------------------------------
diff --git a/src/kudu/master/master.h b/src/kudu/master/master.h
index 63a29a4..fda75a1 100644
--- a/src/kudu/master/master.h
+++ b/src/kudu/master/master.h
@@ -34,6 +34,7 @@
 
 namespace kudu {
 
+class HostPortPB;
 class MaintenanceManager;
 class MonoDelta;
 class ThreadPool;
@@ -88,12 +89,20 @@ class Master : public kserver::KuduServer {
   // Get node instance, Raft role, RPC and HTTP addresses for all
   // masters.
   //
-  // TODO move this to a separate class to be re-used in TS and
+  // NOTE: this performs a round-trip RPC to all of the masters so
+  // should not be used in any performance-critical paths.
+  //
+  // TODO(todd) move this to a separate class to be re-used in TS and
   // client; cache this information with a TTL (possibly in another
   // SysTable), so that we don't have to perform an RPC call on every
   // request.
   Status ListMasters(std::vector<ServerEntryPB>* masters) const;
 
+  // Gets the HostPorts for all of the masters in the cluster.
+  // This is not as complete as ListMasters() above, but operates just
+  // based on local state.
+  Status GetMasterHostPorts(std::vector<HostPortPB>* hostports) const;
+
   bool IsShutdown() const {
     return state_ == kStopped;
   }

http://git-wip-us.apache.org/repos/asf/kudu/blob/3e86797e/src/kudu/master/master.proto
----------------------------------------------------------------------
diff --git a/src/kudu/master/master.proto b/src/kudu/master/master.proto
index 68891cb..4ec6dd1 100644
--- a/src/kudu/master/master.proto
+++ b/src/kudu/master/master.proto
@@ -609,6 +609,18 @@ message ConnectToMasterResponsePB {
   // If the client requested an authentication token, and security is
   // enabled on the cluster, the master returns a signed authn token.
   optional security.SignedTokenPB authn_token = 4;
+
+  // The hosts and ports of the masters in this cluster.
+  //
+  // NOTE: Added in Kudu 1.6.
+  //
+  // NOTE: it is likely, but not guaranteed that the hostnames advertised here
+  // will be usable by all clients. Client implementations should not use this
+  // field for "discovery" of other masters, since then it's likely that
+  // users will configure applications to only talk to one the masters in
+  // an HA setup. If that master then fails, the applications would go
+  // offline.
+  repeated HostPortPB master_addrs = 5;
 }
 
 // ============================================================================

http://git-wip-us.apache.org/repos/asf/kudu/blob/3e86797e/src/kudu/master/master_service.cc
----------------------------------------------------------------------
diff --git a/src/kudu/master/master_service.cc b/src/kudu/master/master_service.cc
index 190e994..0a8e82a 100644
--- a/src/kudu/master/master_service.cc
+++ b/src/kudu/master/master_service.cc
@@ -20,11 +20,13 @@
 #include <memory>
 #include <ostream>
 #include <string>
+#include <utility>
 #include <vector>
 
 #include <gflags/gflags.h>
 #include <glog/logging.h>
 
+#include "kudu/common/common.pb.h"
 #include "kudu/common/wire_protocol.h"
 #include "kudu/common/wire_protocol.pb.h"
 #include "kudu/gutil/strings/substitute.h"
@@ -418,6 +420,18 @@ void MasterServiceImpl::ConnectToMaster(const ConnectToMasterRequestPB* /*req*/,
   auto role = server_->catalog_manager()->Role();
   resp->set_role(role);
 
+  // Set the info about the other masters, so that the client can verify
+  // it has the full set of info.
+  {
+    vector<HostPortPB> hostports;
+    WARN_NOT_OK(server_->GetMasterHostPorts(&hostports),
+                "unable to get HostPorts for masters");
+    resp->mutable_master_addrs()->Reserve(hostports.size());
+    for (auto& hp : hostports) {
+      *resp->add_master_addrs() = std::move(hp);
+    }
+  }
+
   if (l.leader_status().ok()) {
     // TODO(KUDU-1924): it seems there is some window when 'role' is LEADER but
     // in fact we aren't done initializing (and we don't have a CA cert).


[3/5] kudu git commit: KUDU-2191 (2/n): Hive Metastore client

Posted by to...@apache.org.
http://git-wip-us.apache.org/repos/asf/kudu/blob/31d16f74/src/kudu/hms/hms_client-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/hms/hms_client-test.cc b/src/kudu/hms/hms_client-test.cc
new file mode 100644
index 0000000..72214a9
--- /dev/null
+++ b/src/kudu/hms/hms_client-test.cc
@@ -0,0 +1,208 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "kudu/hms/hms_client.h"
+
+#include <cstdint>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include <glog/stl_logging.h> // IWYU pragma: keep
+#include <gtest/gtest.h>
+
+#include "kudu/hms/hive_metastore_constants.h"
+#include "kudu/hms/hive_metastore_types.h"
+#include "kudu/hms/mini_hms.h"
+#include "kudu/util/status.h"
+#include "kudu/util/test_macros.h"
+#include "kudu/util/test_util.h"
+
+using std::make_pair;
+using std::string;
+using std::vector;
+
+namespace kudu {
+namespace hms {
+
+class HmsClientTest : public KuduTest {
+ public:
+
+  Status CreateTable(HmsClient* client,
+                     const string& database_name,
+                     const string& table_name,
+                     const string& table_id) {
+    hive::Table table;
+    table.dbName = database_name;
+    table.tableName = table_name;
+    table.tableType = "MANAGED_TABLE";
+
+    table.__set_parameters({
+        make_pair(HmsClient::kKuduTableIdKey, table_id),
+        make_pair(HmsClient::kKuduMasterAddrsKey, string("TODO")),
+        make_pair(hive::g_hive_metastore_constants.META_TABLE_STORAGE,
+                  HmsClient::kKuduStorageHandler),
+    });
+
+    return client->CreateTable(table);
+  }
+
+  Status DropTable(HmsClient* client,
+                   const string& database_name,
+                   const string& table_name,
+                   const string& table_id) {
+    hive::EnvironmentContext env_ctx;
+    env_ctx.__set_properties({ make_pair(HmsClient::kKuduTableIdKey, table_id) });
+    return client->DropTableWithContext(database_name, table_name, env_ctx);
+  }
+};
+
+TEST_F(HmsClientTest, TestHmsOperations) {
+  MiniHms hms;
+  ASSERT_OK(hms.Start());
+
+  HmsClient client(hms.address());
+  ASSERT_OK(client.Start());
+
+  // Create a database.
+  string database_name = "my_db";
+  hive::Database db;
+  db.name = database_name;
+  ASSERT_OK(client.CreateDatabase(db));
+  ASSERT_TRUE(client.CreateDatabase(db).IsAlreadyPresent());
+
+  // Get all databases.
+  vector<string> databases;
+  ASSERT_OK(client.GetAllDatabases(&databases));
+  std::sort(databases.begin(), databases.end());
+  EXPECT_EQ(vector<string>({ "default", database_name }), databases) << "Databases: " << databases;
+
+  // Get a specific database..
+  hive::Database my_db;
+  ASSERT_OK(client.GetDatabase(database_name, &my_db));
+  EXPECT_EQ(database_name, my_db.name) << "my_db: " << my_db;
+
+  string table_name = "my_table";
+  string table_id = "table-id";
+
+  // Check that the HMS rejects Kudu tables without a table ID.
+  ASSERT_STR_CONTAINS(CreateTable(&client, database_name, table_name, "").ToString(),
+                      "Kudu table entry must contain a table ID");
+
+  // Create a table.
+  ASSERT_OK(CreateTable(&client, database_name, table_name, table_id));
+  ASSERT_TRUE(CreateTable(&client, database_name, table_name, table_id).IsAlreadyPresent());
+
+  // Retrieve a table.
+  hive::Table my_table;
+  ASSERT_OK(client.GetTable(database_name, table_name, &my_table));
+  EXPECT_EQ(database_name, my_table.dbName) << "my_table: " << my_table;
+  EXPECT_EQ(table_name, my_table.tableName);
+  EXPECT_EQ(table_id, my_table.parameters[HmsClient::kKuduTableIdKey]);
+  EXPECT_EQ(HmsClient::kKuduStorageHandler,
+            my_table.parameters[hive::g_hive_metastore_constants.META_TABLE_STORAGE]);
+  EXPECT_EQ("MANAGED_TABLE", my_table.tableType);
+
+  string new_table_name = "my_altered_table";
+
+  // Renaming the table with an incorrect table ID should fail.
+  hive::Table altered_table(my_table);
+  altered_table.tableName = new_table_name;
+  altered_table.parameters[HmsClient::kKuduTableIdKey] = "bogus-table-id";
+  ASSERT_TRUE(client.AlterTable(database_name, table_name, altered_table).IsRuntimeError());
+
+  // Rename the table.
+  altered_table.parameters[HmsClient::kKuduTableIdKey] = table_id;
+  ASSERT_OK(client.AlterTable(database_name, table_name, altered_table));
+
+  // Original table is gone.
+  ASSERT_TRUE(client.AlterTable(database_name, table_name, altered_table).IsIllegalState());
+
+  // Check that the altered table's properties are intact.
+  hive::Table renamed_table;
+  ASSERT_OK(client.GetTable(database_name, new_table_name, &renamed_table));
+  EXPECT_EQ(database_name, renamed_table.dbName);
+  EXPECT_EQ(new_table_name, renamed_table.tableName);
+  EXPECT_EQ(table_id, renamed_table.parameters[HmsClient::kKuduTableIdKey]);
+  EXPECT_EQ(HmsClient::kKuduStorageHandler,
+            renamed_table.parameters[hive::g_hive_metastore_constants.META_TABLE_STORAGE]);
+  EXPECT_EQ("MANAGED_TABLE", renamed_table.tableType);
+
+  // Create a table with an uppercase name.
+  string uppercase_table_name = "my_UPPERCASE_Table";
+  ASSERT_OK(CreateTable(&client, database_name, uppercase_table_name, "uppercase-table-id"));
+
+  // Create a table with an illegal utf-8 name.
+  ASSERT_TRUE(CreateTable(&client, database_name, "☃ sculptures 😉", table_id).IsInvalidArgument());
+
+  // Get all tables.
+  vector<string> tables;
+  ASSERT_OK(client.GetAllTables(database_name, &tables));
+  std::sort(tables.begin(), tables.end());
+  EXPECT_EQ(vector<string>({ new_table_name, "my_uppercase_table" }), tables)
+      << "Tables: " << tables;
+
+  // Check that the HMS rejects Kudu table drops with a bogus table ID.
+  ASSERT_TRUE(DropTable(&client, database_name, new_table_name, "bogus-table-id").IsRuntimeError());
+  // Check that the HMS rejects non-existent table drops.
+  ASSERT_TRUE(DropTable(&client, database_name, "foo-bar", "bogus-table-id").IsNotFound());
+
+  // Drop a table.
+  ASSERT_OK(DropTable(&client, database_name, new_table_name, table_id));
+
+  // Drop the database.
+  ASSERT_TRUE(client.DropDatabase(database_name).IsIllegalState());
+  // TODO(HIVE-17008)
+  // ASSERT_OK(client.DropDatabase(database_name, Cascade::kTrue));
+  // TODO(HIVE-17008)
+  // ASSERT_TRUE(client.DropDatabase(database_name).IsNotFound());
+
+  int64_t event_id;
+  ASSERT_OK(client.GetCurrentNotificationEventId(&event_id));
+
+  // Retrieve the notification log and spot-check that the results look sensible.
+  vector<hive::NotificationEvent> events;
+  ASSERT_OK(client.GetNotificationEvents(-1, 100, &events));
+
+  ASSERT_EQ(5, events.size());
+  EXPECT_EQ("CREATE_DATABASE", events[0].eventType);
+  EXPECT_EQ("CREATE_TABLE", events[1].eventType);
+  EXPECT_EQ("ALTER_TABLE", events[2].eventType);
+  EXPECT_EQ("CREATE_TABLE", events[3].eventType);
+  EXPECT_EQ("DROP_TABLE", events[4].eventType);
+  // TODO(HIVE-17008)
+  //EXPECT_EQ("DROP_TABLE", events[5].eventType);
+  //EXPECT_EQ("DROP_DATABASE", events[6].eventType);
+
+  // Retrieve a specific notification log.
+  events.clear();
+  ASSERT_OK(client.GetNotificationEvents(2, 1, &events));
+  ASSERT_EQ(1, events.size()) << "events: " << events;
+  EXPECT_EQ("ALTER_TABLE", events[0].eventType);
+  ASSERT_OK(client.Stop());
+}
+
+TEST_F(HmsClientTest, TestDeserializeJsonTable) {
+  string json = R"#({"1":{"str":"table_name"},"2":{"str":"database_name"}})#";
+  hive::Table table;
+  ASSERT_OK(HmsClient::DeserializeJsonTable(json, &table));
+  ASSERT_EQ("table_name", table.tableName);
+  ASSERT_EQ("database_name", table.dbName);
+}
+
+} // namespace hms
+} // namespace kudu

http://git-wip-us.apache.org/repos/asf/kudu/blob/31d16f74/src/kudu/hms/hms_client.cc
----------------------------------------------------------------------
diff --git a/src/kudu/hms/hms_client.cc b/src/kudu/hms/hms_client.cc
new file mode 100644
index 0000000..2ee87a8
--- /dev/null
+++ b/src/kudu/hms/hms_client.cc
@@ -0,0 +1,250 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "kudu/hms/hms_client.h"
+
+#include <algorithm>
+#include <memory>
+#include <string>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include <glog/logging.h>
+#include <thrift/Thrift.h>
+#include <thrift/protocol/TBinaryProtocol.h>
+#include <thrift/protocol/TJSONProtocol.h>
+#include <thrift/transport/TBufferTransports.h>
+#include <thrift/transport/TSocket.h>
+
+#include "kudu/gutil/strings/split.h"
+#include "kudu/gutil/strings/strip.h"
+#include "kudu/gutil/strings/substitute.h"
+#include "kudu/hms/ThriftHiveMetastore.h"
+#include "kudu/hms/hive_metastore_constants.h"
+#include "kudu/util/net/net_util.h"
+#include "kudu/util/status.h"
+#include "kudu/util/stopwatch.h"
+
+using apache::thrift::TException;
+using apache::thrift::protocol::TBinaryProtocol;
+using apache::thrift::protocol::TJSONProtocol;
+using apache::thrift::transport::TBufferedTransport;
+using apache::thrift::transport::TMemoryBuffer;
+using apache::thrift::transport::TSocket;
+using std::make_shared;
+using std::shared_ptr;
+using std::string;
+using std::vector;
+
+namespace kudu {
+namespace hms {
+
+// The entire set of Hive-specific exceptions is defined in
+// hive_metastore.thrift. We do not try to handle all of them - TException acts
+// as a catch all, as well as default for network errors.
+#define HMS_RET_NOT_OK(call, msg) \
+  try { \
+    (call); \
+  } catch (const hive::AlreadyExistsException& e) { \
+    return Status::AlreadyPresent((msg), e.what()); \
+  } catch (const hive::UnknownDBException& e) { \
+    return Status::NotFound((msg), e.what()); \
+  } catch (const hive::UnknownTableException& e) { \
+    return Status::NotFound((msg), e.what()); \
+  } catch (const hive::NoSuchObjectException& e) { \
+    return Status::NotFound((msg), e.what()); \
+  } catch (const hive::InvalidObjectException& e) { \
+    return Status::InvalidArgument((msg), e.what()); \
+  } catch (const hive::InvalidOperationException& e) { \
+    return Status::IllegalState((msg), e.what()); \
+  } catch (const hive::MetaException& e) { \
+    return Status::RuntimeError((msg), e.what()); \
+  } catch (const TException& e) { \
+    return Status::IOError((msg), e.what()); \
+  }
+
+const char* const HmsClient::kKuduTableIdKey = "kudu.table_id";
+const char* const HmsClient::kKuduMasterAddrsKey = "kudu.master_addresses";
+const char* const HmsClient::kKuduStorageHandler = "org.apache.kudu.hive.KuduStorageHandler";
+
+const char* const HmsClient::kTransactionalEventListeners =
+  "hive.metastore.transactional.event.listeners";
+const char* const HmsClient::kDbNotificationListener =
+  "org.apache.hive.hcatalog.listener.DbNotificationListener";
+const char* const HmsClient::kKuduMetastorePlugin =
+  "org.apache.kudu.hive.metastore.KuduMetastorePlugin";
+
+const int kSlowExecutionWarningThresholdMs = 500;
+
+HmsClient::HmsClient(const HostPort& hms_address)
+    : client_(nullptr) {
+  auto socket = make_shared<TSocket>(hms_address.host(), hms_address.port());
+  auto transport = make_shared<TBufferedTransport>(std::move(socket));
+  auto protocol = make_shared<TBinaryProtocol>(std::move(transport));
+  client_ = hive::ThriftHiveMetastoreClient(std::move(protocol));
+}
+
+HmsClient::~HmsClient() {
+  WARN_NOT_OK(Stop(), "failed to shutdown HMS client");
+}
+
+Status HmsClient::Start() {
+  SCOPED_LOG_SLOW_EXECUTION(WARNING, 1000 /* ms */, "starting HMS client");
+  HMS_RET_NOT_OK(client_.getOutputProtocol()->getTransport()->open(),
+                 "failed to open Hive MetaStore connection");
+
+  // Immediately after connecting to the HMS, check that it is configured with
+  // the required event listeners.
+  string event_listener_config;
+  HMS_RET_NOT_OK(client_.get_config_value(event_listener_config, kTransactionalEventListeners, ""),
+                 "failed to get Hive MetaStore transactional event listener configuration");
+
+  // Parse the set of listeners from the configuration string.
+  vector<string> listeners = strings::Split(event_listener_config, ",", strings::SkipWhitespace());
+  for (auto& listener : listeners) {
+    StripWhiteSpace(&listener);
+  }
+
+  for (const auto& required_listener : { kDbNotificationListener, kKuduMetastorePlugin }) {
+    if (std::find(listeners.begin(), listeners.end(), required_listener) == listeners.end()) {
+      return Status::IllegalState(
+          strings::Substitute("Hive Metastore configuration is missing required "
+                              "transactional event listener ($0): $1",
+                              kTransactionalEventListeners, required_listener));
+    }
+  }
+
+  return Status::OK();
+}
+
+Status HmsClient::Stop() {
+  SCOPED_LOG_SLOW_EXECUTION(WARNING, kSlowExecutionWarningThresholdMs, "stopping HMS client");
+  HMS_RET_NOT_OK(client_.getInputProtocol()->getTransport()->close(),
+                 "failed to close Hive MetaStore connection");
+  return Status::OK();
+}
+
+Status HmsClient::CreateDatabase(const hive::Database& database) {
+  SCOPED_LOG_SLOW_EXECUTION(WARNING, kSlowExecutionWarningThresholdMs, "create HMS database");
+  HMS_RET_NOT_OK(client_.create_database(database), "failed to create Hive MetaStore database");
+  return Status::OK();
+}
+
+Status HmsClient::DropDatabase(const string& database_name, Cascade cascade) {
+  SCOPED_LOG_SLOW_EXECUTION(WARNING, kSlowExecutionWarningThresholdMs, "drop HMS database");
+  HMS_RET_NOT_OK(client_.drop_database(database_name, true, cascade == Cascade::kTrue),
+                 "failed to drop Hive MetaStore database");
+  return Status::OK();
+}
+
+Status HmsClient::GetAllDatabases(vector<string>* databases) {
+  DCHECK(databases);
+  SCOPED_LOG_SLOW_EXECUTION(WARNING, kSlowExecutionWarningThresholdMs, "get all HMS databases");
+  HMS_RET_NOT_OK(client_.get_all_databases(*databases),
+                 "failed to get Hive MetaStore databases");
+  return Status::OK();
+}
+
+Status HmsClient::GetDatabase(const string& pattern, hive::Database* database) {
+  DCHECK(database);
+  SCOPED_LOG_SLOW_EXECUTION(WARNING, kSlowExecutionWarningThresholdMs, "get HMS database");
+  HMS_RET_NOT_OK(client_.get_database(*database, pattern),
+                 "failed to get Hive MetaStore database");
+  return Status::OK();
+}
+
+Status HmsClient::CreateTable(const hive::Table& table) {
+  SCOPED_LOG_SLOW_EXECUTION(WARNING, kSlowExecutionWarningThresholdMs, "create HMS table");
+  HMS_RET_NOT_OK(client_.create_table(table), "failed to create Hive MetaStore table");
+  return Status::OK();
+}
+
+Status HmsClient::AlterTable(const std::string& database_name,
+                             const std::string& table_name,
+                             const hive::Table& table) {
+  HMS_RET_NOT_OK(client_.alter_table(database_name, table_name, table),
+                 "failed to alter Hive MetaStore table");
+  return Status::OK();
+}
+
+Status HmsClient::DropTableWithContext(const string& database_name,
+                                       const string& table_name,
+                                       const hive::EnvironmentContext& env_ctx) {
+  SCOPED_LOG_SLOW_EXECUTION(WARNING, kSlowExecutionWarningThresholdMs, "drop HMS table");
+  HMS_RET_NOT_OK(client_.drop_table_with_environment_context(database_name, table_name,
+                                                             true, env_ctx),
+                 "failed to drop Hive MetaStore table");
+  return Status::OK();
+}
+
+Status HmsClient::GetAllTables(const string& database_name,
+                               vector<string>* tables) {
+  DCHECK(tables);
+  SCOPED_LOG_SLOW_EXECUTION(WARNING, kSlowExecutionWarningThresholdMs, "get all HMS tables");
+  HMS_RET_NOT_OK(client_.get_all_tables(*tables, database_name),
+                 "failed to get Hive MetaStore tables");
+  return Status::OK();
+}
+
+Status HmsClient::GetTable(const string& database_name,
+                           const string& table_name,
+                           hive::Table* table) {
+  DCHECK(table);
+  SCOPED_LOG_SLOW_EXECUTION(WARNING, kSlowExecutionWarningThresholdMs, "get HMS table");
+  HMS_RET_NOT_OK(client_.get_table(*table, database_name, table_name),
+                 "failed to get Hive MetaStore table");
+  return Status::OK();
+}
+
+Status HmsClient::GetCurrentNotificationEventId(int64_t* event_id) {
+  DCHECK(event_id);
+  SCOPED_LOG_SLOW_EXECUTION(WARNING, kSlowExecutionWarningThresholdMs,
+                            "get HMS current notification event ID");
+  hive::CurrentNotificationEventId response;
+  HMS_RET_NOT_OK(client_.get_current_notificationEventId(response),
+                 "failed to get Hive MetaStore current event ID");
+  *event_id = response.eventId;
+  return Status::OK();
+}
+
+Status HmsClient::GetNotificationEvents(int64_t last_event_id,
+                                        int32_t max_events,
+                                        vector<hive::NotificationEvent>* events) {
+  DCHECK(events);
+  SCOPED_LOG_SLOW_EXECUTION(WARNING, kSlowExecutionWarningThresholdMs,
+                            "get HMS notification events");
+  hive::NotificationEventRequest request;
+  request.lastEvent = last_event_id;
+  request.__set_maxEvents(max_events);
+  hive::NotificationEventResponse response;
+  HMS_RET_NOT_OK(client_.get_next_notification(response, request),
+                 "failed to get Hive MetaStore next notification");
+  events->swap(response.events);
+  return Status::OK();
+}
+
+Status HmsClient::DeserializeJsonTable(Slice json, hive::Table* table)  {
+  shared_ptr<TMemoryBuffer> membuffer(new TMemoryBuffer(json.size()));
+  membuffer->write(json.data(), json.size());
+  TJSONProtocol protocol(membuffer);
+  HMS_RET_NOT_OK(table->read(&protocol), "failed to deserialize JSON table");
+  return Status::OK();
+}
+
+} // namespace hms
+} // namespace kudu

http://git-wip-us.apache.org/repos/asf/kudu/blob/31d16f74/src/kudu/hms/hms_client.h
----------------------------------------------------------------------
diff --git a/src/kudu/hms/hms_client.h b/src/kudu/hms/hms_client.h
new file mode 100644
index 0000000..a5ed09f
--- /dev/null
+++ b/src/kudu/hms/hms_client.h
@@ -0,0 +1,149 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <string>
+#include <vector> // IWYU pragma: keep
+
+#include "kudu/gutil/port.h"
+#include "kudu/hms/ThriftHiveMetastore.h"
+#include "kudu/hms/hive_metastore_types.h"
+#include "kudu/util/slice.h"
+#include "kudu/util/status.h"
+
+namespace hive = Apache::Hadoop::Hive;
+
+namespace kudu {
+
+class HostPort;
+
+namespace hms {
+
+// Whether to drop child-objects when dropping an HMS object.
+enum class Cascade {
+  kTrue,
+  kFalse,
+};
+
+// A client for the Hive MetaStore.
+//
+// All operations are synchronous, and may block.
+//
+// HmsClient is not thread safe.
+//
+// TODO(dan): this client is lacking adequate failure handling, including:
+//  - Documentation of specific Status codes returned in error scenarios
+//  - Connection timeouts
+//  - Handling and/or documentation of retry and reconnection behavior
+//
+// TODO(dan): this client does not handle HA (multi) HMS deployments.
+//
+// TODO(dan): this client does not handle Kerberized HMS deployments.
+class HmsClient {
+ public:
+
+  static const char* const kKuduTableIdKey;
+  static const char* const kKuduMasterAddrsKey;
+  static const char* const kKuduStorageHandler;
+
+  static const char* const kTransactionalEventListeners;
+  static const char* const kDbNotificationListener;
+  static const char* const kKuduMetastorePlugin;
+
+  explicit HmsClient(const HostPort& hms_address);
+  ~HmsClient();
+
+  // Starts the HMS client.
+  //
+  // This method will open a synchronous TCP connection to the HMS. If the HMS
+  // can not be reached, an error is returned.
+  //
+  // Must be called before any subsequent operations using the client.
+  Status Start() WARN_UNUSED_RESULT;
+
+  // Stops the HMS client.
+  //
+  // This is optional; if not called the destructor will stop the client.
+  Status Stop() WARN_UNUSED_RESULT;
+
+  // Creates a new database in the HMS.
+  //
+  // If a database already exists by the same name an AlreadyPresent status is
+  // returned.
+  Status CreateDatabase(const hive::Database& database) WARN_UNUSED_RESULT;
+
+  // Drops a database in the HMS.
+  //
+  // If 'cascade' is Cascade::kTrue, tables in the database will automatically
+  // be dropped (this is the default in HiveQL). Otherwise, the operation will
+  // return IllegalState if the database contains tables.
+  Status DropDatabase(const std::string& database_name,
+                      Cascade cascade = Cascade::kFalse) WARN_UNUSED_RESULT;
+
+  // Returns all HMS databases.
+  Status GetAllDatabases(std::vector<std::string>* databases) WARN_UNUSED_RESULT;
+
+  // Retrieves a database from the HMS.
+  Status GetDatabase(const std::string& pattern, hive::Database* database) WARN_UNUSED_RESULT;
+
+  // Creates a table in the HMS.
+  Status CreateTable(const hive::Table& table) WARN_UNUSED_RESULT;
+
+  // Alter a table in the HMS.
+  Status AlterTable(const std::string& database_name,
+                    const std::string& table_name,
+                    const hive::Table& table) WARN_UNUSED_RESULT;
+
+  // Drops a Kudu table in the HMS.
+  Status DropTableWithContext(const std::string& database_name,
+                              const std::string& table_name,
+                              const hive::EnvironmentContext& env_ctx) WARN_UNUSED_RESULT;
+
+  // Retrieves an HMS table metadata.
+  Status GetTable(const std::string& database_name,
+                  const std::string& table_name,
+                  hive::Table* table) WARN_UNUSED_RESULT;
+
+  // Retrieves all tables in an HMS database.
+  Status GetAllTables(const std::string& database_name,
+                      std::vector<std::string>* tables) WARN_UNUSED_RESULT;
+
+  // Retrieves a the current HMS notification event ID.
+  Status GetCurrentNotificationEventId(int64_t* event_id) WARN_UNUSED_RESULT;
+
+  // Retrieves HMS notification log events, beginning after 'last_event_id'.
+  Status GetNotificationEvents(int64_t last_event_id,
+                               int32_t max_events,
+                               std::vector<hive::NotificationEvent>* events) WARN_UNUSED_RESULT;
+
+  // Deserializes a JSON encoded table.
+  //
+  // Notification event log messages often include table objects serialized as
+  // JSON.
+  //
+  // See org.apache.hadoop.hive.metastore.messaging.json.JSONMessageFactory for
+  // the Java equivalent.
+  static Status DeserializeJsonTable(Slice json, hive::Table* table) WARN_UNUSED_RESULT;
+
+ private:
+  hive::ThriftHiveMetastoreClient client_;
+};
+
+} // namespace hms
+} // namespace kudu

http://git-wip-us.apache.org/repos/asf/kudu/blob/31d16f74/src/kudu/hms/mini_hms.cc
----------------------------------------------------------------------
diff --git a/src/kudu/hms/mini_hms.cc b/src/kudu/hms/mini_hms.cc
new file mode 100644
index 0000000..392033b
--- /dev/null
+++ b/src/kudu/hms/mini_hms.cc
@@ -0,0 +1,177 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "kudu/hms/mini_hms.h"
+
+#include <algorithm>
+#include <csignal>
+#include <cstdlib>
+#include <map>
+#include <memory>
+#include <ostream>
+#include <string>
+
+#include <glog/logging.h>
+
+#include "kudu/gutil/strings/substitute.h"
+#include "kudu/util/env.h"
+#include "kudu/util/monotime.h"
+#include "kudu/util/path_util.h"
+#include "kudu/util/slice.h"
+#include "kudu/util/status.h"
+#include "kudu/util/stopwatch.h"
+#include "kudu/util/string_case.h"
+#include "kudu/util/subprocess.h"
+#include "kudu/util/test_util.h"
+
+using std::map;
+using std::string;
+using std::unique_ptr;
+using strings::Substitute;
+
+namespace kudu {
+namespace hms {
+
+MiniHms::~MiniHms() {
+  if (hms_process_) {
+    VLOG(1) << "Stopping HMS";
+    unique_ptr<Subprocess> proc = std::move(hms_process_);
+    WARN_NOT_OK(proc->KillAndWait(SIGTERM), "failed to stop the Hive MetaStore process");
+  }
+}
+
+namespace {
+
+Status FindHomeDir(const char* name, const string& bin_dir, string* home_dir) {
+  string name_upper;
+  ToUpperCase(name, &name_upper);
+
+  string env_var = Substitute("$0_HOME", name_upper);
+  const char* env = std::getenv(env_var.c_str());
+  *home_dir = env == nullptr ? JoinPathSegments(bin_dir, Substitute("$0-home", name)) : env;
+
+  if (!Env::Default()->FileExists(*home_dir)) {
+    return Status::NotFound(Substitute("$0 directory does not exist", env_var), *home_dir);
+  }
+  return Status::OK();
+}
+
+} // anonymous namespace
+
+Status MiniHms::Start() {
+  SCOPED_LOG_SLOW_EXECUTION(WARNING, 20000, "Starting HMS");
+  CHECK(!hms_process_);
+
+  VLOG(1) << "Starting HMS";
+
+  Env* env = Env::Default();
+
+  string exe;
+  RETURN_NOT_OK(env->GetExecutablePath(&exe));
+  const string bin_dir = DirName(exe);
+
+  string hadoop_home;
+  string hive_home;
+  string java_home;
+  RETURN_NOT_OK(FindHomeDir("hadoop", bin_dir, &hadoop_home));
+  RETURN_NOT_OK(FindHomeDir("hive", bin_dir, &hive_home));
+  RETURN_NOT_OK(FindHomeDir("java", bin_dir, &java_home));
+
+  auto tmp_dir = GetTestDataDirectory();
+
+  RETURN_NOT_OK(CreateHiveSite(tmp_dir));
+
+  // Comma-separated list of additional jars to add to the HMS classpath.
+  string aux_jars = Substitute("$0/hcatalog/share/hcatalog,$1/hms-plugin.jar",
+                               hive_home, bin_dir);
+  map<string, string> env_vars {
+      { "JAVA_HOME", java_home },
+      { "HADOOP_HOME", hadoop_home },
+      { "HIVE_AUX_JARS_PATH", aux_jars },
+      { "HIVE_CONF_DIR", tmp_dir },
+  };
+
+  // Start the HMS.
+  hms_process_.reset(new Subprocess({
+        Substitute("$0/bin/hive", hive_home),
+        "--service", "metastore",
+        "-v",
+        "-p", "0", // Use an ephemeral port.
+  }));
+
+  hms_process_->SetEnvVars(env_vars);
+  RETURN_NOT_OK(hms_process_->Start());
+
+  // Wait for HMS to start listening on its ports and commencing operation.
+  VLOG(1) << "Waiting for HMS ports";
+  return WaitForTcpBind(hms_process_->pid(), &port_, MonoDelta::FromSeconds(20));
+}
+
+Status MiniHms::CreateHiveSite(const string& tmp_dir) const {
+  // 'datanucleus.schema.autoCreateAll' and 'hive.metastore.schema.verification'
+  // allow Hive to startup and run without first running the schemaTool.
+  // 'hive.metastore.event.db.listener.timetolive' configures how long the
+  // Metastore will store notification log events before GCing them.
+  static const string kFileTemplate = R"(
+<configuration>
+  <property>
+    <name>hive.metastore.transactional.event.listeners</name>
+    <value>
+      org.apache.hive.hcatalog.listener.DbNotificationListener,
+      org.apache.kudu.hive.metastore.KuduMetastorePlugin
+    </value>
+  </property>
+
+  <property>
+    <name>datanucleus.schema.autoCreateAll</name>
+    <value>true</value>
+  </property>
+
+  <property>
+    <name>hive.metastore.schema.verification</name>
+    <value>false</value>
+  </property>
+
+  <property>
+    <name>hive.metastore.warehouse.dir</name>
+    <value>file://$1/warehouse/</value>
+  </property>
+
+  <property>
+    <name>javax.jdo.option.ConnectionURL</name>
+    <value>jdbc:derby:memory:$1/metadb;create=true</value>
+  </property>
+
+  <property>
+    <name>hive.metastore.event.db.listener.timetolive</name>
+    <value>$0s</value>
+  </property>
+
+</configuration>
+  )";
+
+  string file_contents = strings::Substitute(kFileTemplate,
+                                             notification_log_ttl_.ToSeconds(),
+                                             tmp_dir);
+
+  return WriteStringToFile(Env::Default(),
+                           file_contents,
+                           JoinPathSegments(tmp_dir, "hive-site.xml"));
+}
+
+} // namespace hms
+} // namespace kudu

http://git-wip-us.apache.org/repos/asf/kudu/blob/31d16f74/src/kudu/hms/mini_hms.h
----------------------------------------------------------------------
diff --git a/src/kudu/hms/mini_hms.h b/src/kudu/hms/mini_hms.h
new file mode 100644
index 0000000..2e5ac30
--- /dev/null
+++ b/src/kudu/hms/mini_hms.h
@@ -0,0 +1,71 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <string>
+
+#include <glog/logging.h>
+
+#include "kudu/gutil/port.h"
+#include "kudu/util/monotime.h"
+#include "kudu/util/net/net_util.h"
+#include "kudu/util/status.h"
+#include "kudu/util/subprocess.h" // IWYU pragma: keep
+
+namespace kudu {
+namespace hms {
+
+class MiniHms {
+ public:
+
+  MiniHms() = default;
+
+  ~MiniHms();
+
+  // Configures the notification log TTL. Must be called before Start().
+  void SetNotificationLogTtl(MonoDelta ttl) {
+    CHECK(hms_process_);
+    notification_log_ttl_ = ttl;
+  }
+
+  // Starts the mini Hive metastore.
+  Status Start() WARN_UNUSED_RESULT;
+
+  // Returns the address of the Hive metastore. Should only be called after the
+  // metastore is started.
+  HostPort address() const {
+    return HostPort("127.0.0.1", port_);
+  }
+
+ private:
+
+  // Creates a hive-site.xml for the mini HMS.
+  Status CreateHiveSite(const std::string& tmp_dir) const WARN_UNUSED_RESULT;
+
+  // Waits for the metastore process to bind to a port.
+  Status WaitForHmsPorts() WARN_UNUSED_RESULT;
+
+  std::unique_ptr<Subprocess> hms_process_;
+  MonoDelta notification_log_ttl_ = MonoDelta::FromSeconds(86400);
+  uint16_t port_ = 0;
+};
+
+} // namespace hms
+} // namespace kudu

http://git-wip-us.apache.org/repos/asf/kudu/blob/31d16f74/thirdparty/LICENSE.txt
----------------------------------------------------------------------
diff --git a/thirdparty/LICENSE.txt b/thirdparty/LICENSE.txt
index 623fd46..b229691 100644
--- a/thirdparty/LICENSE.txt
+++ b/thirdparty/LICENSE.txt
@@ -619,7 +619,9 @@ Source: http://www.boost.org/
   ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
   DEALINGS IN THE SOFTWARE.
 
-
+--------------------------------------------------------------------------------
+thirdparty/thrift-*/: Apache License v2.0
+Source: https://thrift.apache.org
 
 ================================================================================
 BUILD-ONLY DEPENDENCIES
@@ -655,3 +657,17 @@ thirdparty/python-*: Python 2.7 license (https://www.python.org/download/release
 Source: http://www.python.org/
 NOTE: optional build-time dependency, not linked or bundled.
 
+--------------------------------------------------------------------------------
+thirdparty/bison: GPLv3 license (https://www.gnu.org/licenses/gpl.html)
+Source: https://www.gnu.org/software/bison
+NOTE: build-time dependency
+
+--------------------------------------------------------------------------------
+thirdparty/hadoop-*/: Apache License v2.0
+Source: https://hadoop.apache.org
+NOTE: build-time dependency
+
+--------------------------------------------------------------------------------
+thirdparty/hive-*/: Apache License v2.0
+Source: https://hive.apache.org
+NOTE: build-time dependency

http://git-wip-us.apache.org/repos/asf/kudu/blob/31d16f74/thirdparty/build-definitions.sh
----------------------------------------------------------------------
diff --git a/thirdparty/build-definitions.sh b/thirdparty/build-definitions.sh
index e5b8045..e38d54a 100644
--- a/thirdparty/build-definitions.sh
+++ b/thirdparty/build-definitions.sh
@@ -728,3 +728,59 @@ build_sparsepp() {
   rsync -av --delete sparsepp/ $PREFIX/include/sparsepp/
   popd
 }
+
+build_thrift() {
+  THRIFT_BDIR=$TP_BUILD_DIR/$THRIFT_NAME$MODE_SUFFIX
+  mkdir -p $THRIFT_BDIR
+  pushd $THRIFT_BDIR
+  rm -Rf CMakeCache.txt CMakeFiles/
+
+  # Thrift depends on bison.
+  #
+  # Configure for a very minimal install - only the C++ client libraries are needed.
+  # Thrift requires C++11 when compiled on Linux against libc++ (see cxxfunctional.h).
+  CFLAGS="$EXTRA_CFLAGS" \
+    CXXFLAGS="$EXTRA_CXXFLAGS -std=c++11" \
+    LDFLAGS="$EXTRA_LDFLAGS" \
+    LIBS="$EXTRA_LIBS" \
+    cmake \
+    -DBOOST_ROOT=$PREFIX \
+    -DBUILD_C_GLIB=OFF \
+    -DBUILD_COMPILER=ON \
+    -DBUILD_CPP=ON \
+    -DBUILD_EXAMPLES=OFF \
+    -DBUILD_HASKELL=OFF \
+    -DBUILD_JAVA=OFF \
+    -DBUILD_PYTHON=OFF \
+    -DBUILD_TESTING=OFF \
+    -DBUILD_TUTORIALS=OFF \
+    -DCMAKE_BUILD_TYPE=Release \
+    -DCMAKE_INSTALL_PREFIX=$PREFIX \
+    -DWITH_BOOSTTHREADS=OFF \
+    -DWITH_LIBEVENT=OFF \
+    -DWITH_OPENSSL=OFF \
+    -DWITH_PLUGIN=OFF \
+    $EXTRA_CMAKE_FLAGS \
+    $THRIFT_SOURCE
+
+  ${NINJA:-make} -j$PARALLEL $EXTRA_MAKEFLAGS install
+  popd
+
+  # Install fb303.thrift into the share directory.
+  mkdir -p $PREFIX/share/fb303/if
+  cp $THRIFT_SOURCE/contrib/fb303/if/fb303.thrift $PREFIX/share/fb303/if
+}
+
+build_bison() {
+  BISON_BDIR=$TP_BUILD_DIR/$BISON_NAME$MODE_SUFFIX
+  mkdir -p $BISON_BDIR
+  pushd $BISON_BDIR
+  CFLAGS="$EXTRA_CFLAGS" \
+    CXXFLAGS="$EXTRA_CXXFLAGS" \
+    LDFLAGS="$EXTRA_LDFLAGS" \
+    LIBS="$EXTRA_LIBS" \
+    $BISON_SOURCE/configure \
+    --prefix=$PREFIX
+  make -j$PARALLEL $EXTRA_MAKEFLAGS install
+  popd
+}

http://git-wip-us.apache.org/repos/asf/kudu/blob/31d16f74/thirdparty/build-thirdparty.sh
----------------------------------------------------------------------
diff --git a/thirdparty/build-thirdparty.sh b/thirdparty/build-thirdparty.sh
index 962981b..e239984 100755
--- a/thirdparty/build-thirdparty.sh
+++ b/thirdparty/build-thirdparty.sh
@@ -95,6 +95,10 @@ else
       "breakpad")     F_BREAKPAD=1 ;;
       "sparsehash")   F_SPARSEHASH=1 ;;
       "sparsepp")     F_SPARSEPP=1 ;;
+      "thrift")       F_THRIFT=1 ;;
+      "bison")        F_BISON=1 ;;
+      "hadoop")       F_HADOOP=1 ;;
+      "hive")         F_HIVE=1 ;;
       *)              echo "Unknown module: $arg"; exit 1 ;;
     esac
   done
@@ -237,6 +241,22 @@ if [ -n "$F_COMMON" -o -n "$F_SPARSEPP" ]; then
   build_sparsepp
 fi
 
+if [ -n "$F_COMMON" -o -n "$F_BISON" ]; then
+  build_bison
+fi
+
+# Install Hadoop and Hive by symlinking their source directories (which are
+# pre-built) into $PREFIX/opt.
+if [ -n "$F_COMMON" -o -n "$F_HADOOP" ]; then
+  mkdir -p $PREFIX/opt
+  ln -nsf $HADOOP_SOURCE $PREFIX/opt/hadoop
+fi
+
+if [ -n "$F_COMMON" -o -n "$F_HIVE" ]; then
+  mkdir -p $PREFIX/opt
+  ln -nsf $HIVE_SOURCE $PREFIX/opt/hive
+fi
+
 ### Build C dependencies without instrumentation
 
 PREFIX=$PREFIX_DEPS
@@ -338,6 +358,10 @@ if [ -n "$F_UNINSTRUMENTED" -o -n "$F_BREAKPAD" ]; then
   build_breakpad
 fi
 
+if [ -n "$F_UNINSTRUMENTED" -o -n "$F_THRIFT" ]; then
+  build_thrift
+fi
+
 restore_env
 
 # If we're on macOS best to exit here, otherwise single dependency builds will try to
@@ -509,6 +533,10 @@ if [ -n "$F_TSAN" -o -n "$F_BREAKPAD" ]; then
   build_breakpad
 fi
 
+if [ -n "$F_TSAN" -o -n "$F_THRIFT" ]; then
+  build_thrift
+fi
+
 restore_env
 
 finish

http://git-wip-us.apache.org/repos/asf/kudu/blob/31d16f74/thirdparty/download-thirdparty.sh
----------------------------------------------------------------------
diff --git a/thirdparty/download-thirdparty.sh b/thirdparty/download-thirdparty.sh
index fc94910..d7f85bf 100755
--- a/thirdparty/download-thirdparty.sh
+++ b/thirdparty/download-thirdparty.sh
@@ -340,6 +340,39 @@ if [ ! -d "$SPARSEPP_SOURCE" ]; then
   popd
 fi
 
+THRIFT_PATCHLEVEL=0
+if [ ! -d "$THRIFT_SOURCE" ]; then
+  fetch_and_expand $THRIFT_NAME.tar.gz
+  pushd $THRIFT_SOURCE
+  touch patchlevel-$THRIFT_PATCHLEVEL
+  popd
+fi
+
+BISON_PATCHLEVEL=0
+if [ ! -d "$BISON_SOURCE" ]; then
+  fetch_and_expand $BISON_NAME.tar.gz
+  # This would normally call autoreconf, but it does not succeed with
+  # autoreconf 2.69 (RHEL 7): "autoreconf: 'configure.ac' or 'configure.in' is required".
+  pushd $BISON_SOURCE
+  touch patchlevel-$BISON_PATCHLEVEL
+  popd
+fi
+
+HIVE_PATCHLEVEL=0
+if [ ! -d "$HIVE_SOURCE" ]; then
+  fetch_and_expand $HIVE_NAME.tar.gz
+  pushd $HIVE_SOURCE
+  touch patchlevel-$HIVE_PATCHLEVEL
+  popd
+fi
+
+HADOOP_PATCHLEVEL=0
+if [ ! -d "$HADOOP_SOURCE" ]; then
+  fetch_and_expand $HADOOP_NAME.tar.gz
+  pushd $HADOOP_SOURCE
+  touch patchlevel-$HADOOP_PATCHLEVEL
+  popd
+fi
 
 echo "---------------"
 echo "Thirdparty dependencies downloaded successfully"

http://git-wip-us.apache.org/repos/asf/kudu/blob/31d16f74/thirdparty/vars.sh
----------------------------------------------------------------------
diff --git a/thirdparty/vars.sh b/thirdparty/vars.sh
index 7fe0e0e..43f3d84 100644
--- a/thirdparty/vars.sh
+++ b/thirdparty/vars.sh
@@ -199,3 +199,27 @@ SPARSEHASH_SOURCE=$TP_SOURCE_DIR/$SPARSEHASH_NAME
 SPARSEPP_VERSION=824860bb76893d163efbcff330734b9f62eecb17
 SPARSEPP_NAME=sparsepp-$SPARSEPP_VERSION
 SPARSEPP_SOURCE=$TP_SOURCE_DIR/$SPARSEPP_NAME
+
+# TODO(dan): bump to 0.11 when it's released. We chose to use a bleeding edge
+# version instead of 0.10 in order to get the API and header inclusion
+# simplifications introduced in THRIFT-2221 (Thrift's previous use of tr1
+# conflicted with gtest's use of tuples).
+THRIFT_VERSION=8b8a8efea13d1c97f856053af0a5c0e6a8a76354
+THRIFT_NAME=thrift-$THRIFT_VERSION
+THRIFT_SOURCE=$TP_SOURCE_DIR/$THRIFT_NAME
+
+BISON_VERSION=3.0.4
+BISON_NAME=bison-$BISON_VERSION
+BISON_SOURCE=$TP_SOURCE_DIR/$BISON_NAME
+
+# TODO(dan): bump to a release version once HIVE-17747 is published.
+HIVE_VERSION=3fb4649fa847cfec33f701f6c884f12087680cf0
+HIVE_NAME=apache-hive-$HIVE_VERSION-bin
+HIVE_SOURCE=$TP_SOURCE_DIR/$HIVE_NAME
+
+# The Hadoop tarball is the binary release tarball from hadoop.apache.org, with
+# the share/doc folder removed. The share/doc folder is about 2GiB of HTML
+# Javadoc files.
+HADOOP_VERSION=2.8.1
+HADOOP_NAME=hadoop-$HADOOP_VERSION
+HADOOP_SOURCE=$TP_SOURCE_DIR/$HADOOP_NAME


[4/5] kudu git commit: KUDU-2191 (2/n): Hive Metastore client

Posted by to...@apache.org.
KUDU-2191 (2/n): Hive Metastore client

This patch lays the groundwork for integrating the Kudu catalog with the
Hive MetaStore.

The focus of this patch is a Kudu-specific C++ HMS client
(hms_client.[h|cc]) in a new hms module. This client provides bindings
for the Hive Metastore APIs that Kudu will use in follow-up commits.

- Thrift has been added as a dependency, and a mechanism for performing
  Thrift codegen at compile time has been added (see FindThrift.cmake,
  based on FindProtobuf.cmake)

- Bison has been added as a build-time dependency, because the system
  bison version on RHEL 6 is not sufficiently new enough for Thrift 0.10.

- Hive and Hadoop have been added to thirdparty as test-only dependencies.

- A Hive MetaStore external mini server is included for testing. See
  mini_hms.[h|cc].

- The Kudu Metastore plugin is compiled from CMake as a standalone jar
  for loading into the HMS mini server.

Change-Id: I155223da912bc18a759df2f1f6bc25d1132a99ee
Reviewed-on: http://gerrit.cloudera.org:8080/7053
Tested-by: Kudu Jenkins
Reviewed-by: Todd Lipcon <to...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/31d16f74
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/31d16f74
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/31d16f74

Branch: refs/heads/master
Commit: 31d16f7493544c850e019f1d2fdc7eef6e2c2258
Parents: d8c39d2
Author: Dan Burkert <da...@apache.org>
Authored: Thu Apr 20 13:16:33 2017 -0700
Committer: Todd Lipcon <to...@apache.org>
Committed: Fri Nov 3 00:07:53 2017 +0000

----------------------------------------------------------------------
 CMakeLists.txt                     |   29 +-
 build-support/dist_test.py         |   16 +-
 build-support/iwyu/iwyu-filter.awk |    1 +
 build-support/run_dist_test.py     |    7 +
 cmake_modules/FindJavaHome.cmake   |   94 ++
 cmake_modules/FindThrift.cmake     |  161 ++++
 src/kudu/hms/CMakeLists.txt        |   77 ++
 src/kudu/hms/hive_metastore.thrift | 1536 +++++++++++++++++++++++++++++++
 src/kudu/hms/hms_client-test.cc    |  208 +++++
 src/kudu/hms/hms_client.cc         |  250 +++++
 src/kudu/hms/hms_client.h          |  149 +++
 src/kudu/hms/mini_hms.cc           |  177 ++++
 src/kudu/hms/mini_hms.h            |   71 ++
 thirdparty/LICENSE.txt             |   18 +-
 thirdparty/build-definitions.sh    |   56 ++
 thirdparty/build-thirdparty.sh     |   28 +
 thirdparty/download-thirdparty.sh  |   33 +
 thirdparty/vars.sh                 |   24 +
 18 files changed, 2928 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/31d16f74/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 59542cf..478bf5e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -791,14 +791,18 @@ function(ADD_THIRDPARTY_LIB LIB_NAME)
   endif()
 endfunction()
 
-# Look in thirdparty prefix paths before anywhere else for system dependencies.
-set(CMAKE_PREFIX_PATH ${THIRDPARTY_INSTALL_COMMON_DIR} ${CMAKE_PREFIX_PATH})
 if (${KUDU_USE_TSAN})
-  set(CMAKE_PREFIX_PATH ${THIRDPARTY_INSTALL_TSAN_DIR} ${CMAKE_PREFIX_PATH})
+  set(THIRDPARTY_INSTALL_CURRENT_DIR ${THIRDPARTY_INSTALL_DIR}/tsan)
 else()
-  set(CMAKE_PREFIX_PATH ${THIRDPARTY_INSTALL_UNINSTRUMENTED_DIR} ${CMAKE_PREFIX_PATH})
+  set(THIRDPARTY_INSTALL_CURRENT_DIR ${THIRDPARTY_INSTALL_DIR}/uninstrumented)
 endif()
 
+# Look in thirdparty prefix paths before anywhere else for system dependencies.
+set(CMAKE_PREFIX_PATH
+    ${THIRDPARTY_INSTALL_COMMON_DIR}
+    ${THIRDPARTY_INSTALL_CURRENT_DIR}
+    ${CMAKE_PREFIX_PATH})
+
 ## Cyrus SASL
 find_package(CyrusSASL REQUIRED)
 include_directories(SYSTEM ${CYRUS_SASL_INCLUDE_DIR})
@@ -852,6 +856,22 @@ ADD_THIRDPARTY_LIB(protoc
   DEPS protobuf)
 find_package(KRPC REQUIRED)
 
+## Thrift
+find_package(Thrift REQUIRED)
+include_directories(SYSTEM ${THRIFT_INCLUDE_DIR})
+ADD_THIRDPARTY_LIB(thrift
+  STATIC_LIB "${THRIFT_STATIC_LIBRARY}"
+  SHARED_LIB "${THRIFT_SHARED_LIBRARY}")
+
+if (NOT NO_TESTS)
+  # The HMS tests rely on JAVA_HOME being set in order to run the HMS, and
+  # the JDK 1.7 or later for compiling the Kudu metastore plugin.
+  find_package(JavaHome REQUIRED)
+  find_package(Java 1.7 REQUIRED)
+  # Defines the add_jar() CMake command.
+  include(UseJava)
+endif()
+
 ## Snappy
 find_package(Snappy REQUIRED)
 include_directories(SYSTEM ${SNAPPY_INCLUDE_DIR})
@@ -1195,6 +1215,7 @@ add_subdirectory(src/kudu/experiments)
 add_subdirectory(src/kudu/fs)
 # Google util libraries borrowed from supersonic, tcmalloc, Chromium, etc.
 add_subdirectory(src/kudu/gutil)
+add_subdirectory(src/kudu/hms)
 add_subdirectory(src/kudu/integration-tests)
 add_subdirectory(src/kudu/kserver)
 add_subdirectory(src/kudu/master)

http://git-wip-us.apache.org/repos/asf/kudu/blob/31d16f74/build-support/dist_test.py
----------------------------------------------------------------------
diff --git a/build-support/dist_test.py b/build-support/dist_test.py
index 28e4e01..a6f97d4 100755
--- a/build-support/dist_test.py
+++ b/build-support/dist_test.py
@@ -72,6 +72,16 @@ DEPS_FOR_ALL = \
 
      # Tests that require tooling require this.
      "build/latest/bin/kudu",
+
+     # The HMS tests require the Hadoop and Hive libraries. These files are just
+     # symlinks, but dist-test will copy the entire directories they point to.
+     # The symlinks themselves won't be recreated, so we point to them with
+     # environment variables in run_dist_test.py.
+     "build/latest/bin/hive-home",
+     "build/latest/bin/hadoop-home",
+
+     # Add the Kudu HMS plugin.
+     "build/latest/bin/hms-plugin.jar",
      ]
 
 # The number of shards to split tests into. This is set on a per-test basis
@@ -183,10 +193,12 @@ def ldd_deps(exe):
   If the provided 'exe' is not a binary executable, returns
   an empty list.
   """
-  if (exe.endswith(".pl") or
+  if (exe.endswith(".jar") or
+      exe.endswith(".pl") or
       exe.endswith(".py") or
       exe.endswith(".sh") or
-      exe.endswith(".txt")):
+      exe.endswith(".txt") or
+      os.path.isdir(exe)):
     return []
   p = subprocess.Popen(["ldd", exe], stdout=subprocess.PIPE)
   out, err = p.communicate()

http://git-wip-us.apache.org/repos/asf/kudu/blob/31d16f74/build-support/iwyu/iwyu-filter.awk
----------------------------------------------------------------------
diff --git a/build-support/iwyu/iwyu-filter.awk b/build-support/iwyu/iwyu-filter.awk
index a749058..c20f2d1 100644
--- a/build-support/iwyu/iwyu-filter.awk
+++ b/build-support/iwyu/iwyu-filter.awk
@@ -90,6 +90,7 @@ BEGIN {
   muted["kudu/common/encoded_key-test.cc"]
   muted["kudu/common/schema.h"]
   muted["kudu/experiments/rwlock-perf.cc"]
+  muted["kudu/hms/hms_client.cc"]
   muted["kudu/rpc/reactor.cc"]
   muted["kudu/rpc/reactor.h"]
   muted["kudu/security/ca/cert_management.cc"]

http://git-wip-us.apache.org/repos/asf/kudu/blob/31d16f74/build-support/run_dist_test.py
----------------------------------------------------------------------
diff --git a/build-support/run_dist_test.py b/build-support/run_dist_test.py
index 8694785..3462722 100755
--- a/build-support/run_dist_test.py
+++ b/build-support/run_dist_test.py
@@ -27,6 +27,7 @@
 # We also 'cat' the test log upon completion so that the test logs are
 # uploaded by the test slave back.
 
+import glob
 import optparse
 import os
 import re
@@ -122,6 +123,12 @@ def main():
   fixup_rpaths(os.path.join(ROOT, "build"))
   fixup_rpaths(os.path.join(ROOT, "thirdparty"))
 
+  # Add environment variables for Java dependencies. These environment variables
+  # are used in mini_hms.cc.
+  env['HIVE_HOME'] = glob.glob(os.path.join(ROOT, "thirdparty/src/apache-hive-*-bin"))[0]
+  env['HADOOP_HOME'] = glob.glob(os.path.join(ROOT, "thirdparty/src/hadoop-*"))[0]
+  env['JAVA_HOME'] = glob.glob("/usr/lib/jvm/java-1.8.0-*")[0]
+
   env['LD_LIBRARY_PATH'] = ":".join(
     [os.path.join(ROOT, "build/dist-test-system-libs/"),
      os.path.abspath(os.path.join(test_dir, "..", "lib"))])

http://git-wip-us.apache.org/repos/asf/kudu/blob/31d16f74/cmake_modules/FindJavaHome.cmake
----------------------------------------------------------------------
diff --git a/cmake_modules/FindJavaHome.cmake b/cmake_modules/FindJavaHome.cmake
new file mode 100644
index 0000000..4cce3bf
--- /dev/null
+++ b/cmake_modules/FindJavaHome.cmake
@@ -0,0 +1,94 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# This is an adaptation of Bigtop's bigtop-detect-javahome:
+# https://github.com/apache/bigtop/blob/38e1571b2f73bbfa6ab0c01a689fae967b8399d9/bigtop-packages/src/common/bigtop-utils/bigtop-detect-javahome
+# (this is the last version to support JDK7).
+#
+# This module defines
+#  JAVA_HOME, directory containing a Java installation
+#  JAVA_HOME_FOUND, whether JAVA_HOME has been found
+
+set(JAVA_HOME_CANDIDATES
+
+    # Oracle JDK 8 Candidates
+    /usr/java/jdk1.8
+    /usr/java/jre1.8
+    /usr/lib/jvm/j2sdk1.8-oracle
+    /usr/lib/jvm/j2sdk1.8-oracle/jre
+    /usr/lib/jvm/java-8-oracle
+    /usr/lib/jdk8-latest
+
+    # OpenJDK 8 Candidates
+    /usr/lib/jvm/java-1.8.0-openjdk-amd64
+    /usr/lib/jvm/java-1.8.0-openjdk-ppc64el
+    /usr/lib/jvm/java-1.8.0-openjdk
+    /usr/lib64/jvm/java-1.8.0-openjdk-1.8.0
+
+    # Oracle JDK 7 Candidates
+    /usr/java/jdk1.7
+    /usr/java/jre1.7
+    /usr/lib/jvm/j2sdk1.7-oracle
+    /usr/lib/jvm/j2sdk1.7-oracle/jre
+    /usr/lib/jvm/java-7-oracle
+    /usr/lib/jdk7-latest
+
+    # OpenJDK 7 Candidates
+    /usr/lib/jvm/java-1.7.0-openjdk
+    /usr/lib/jvm/java-7-openjdk
+
+    # Misc. Candidates
+    /usr/java/default
+    /usr/lib/jvm/java
+    /usr/lib/jvm/jre
+    /usr/lib/jvm/default-java
+    /usr/lib/jvm/java-openjdk
+    /usr/lib/jvm/jre-openjdk)
+
+if (DEFINED ENV{JAVA_HOME})
+  set(JAVA_HOME $ENV{JAVA_HOME})
+  set(JAVA_HOME_FOUND true)
+elseif (APPLE)
+  # Use the 'java_home' finder on macOS.
+  execute_process(COMMAND /usr/libexec/java_home
+                  OUTPUT_VARIABLE JAVA_HOME
+                  RESULT_VARIABLE JAVA_HOME_ERROR
+                  OUTPUT_STRIP_TRAILING_WHITESPACE)
+  if (JAVA_HOME_ERROR)
+    message(FATAL_ERROR "Unable to run /usr/libexec/java_home: ${JAVA_HOME_ERROR}")
+  else()
+    set(JAVA_HOME_FOUND true)
+  endif()
+else()
+  foreach(CANDIDATE ${JAVA_HOME_CANDIDATES})
+    if (IS_DIRECTORY ${CANDIDATE} AND EXISTS ${CANDIDATE}/bin/java)
+      set(JAVA_HOME ${CANDIDATE})
+      set(JAVA_HOME_FOUND true)
+      break()
+    endif()
+  endforeach()
+endif()
+
+if (JAVA_HOME_FOUND AND NOT EXISTS "${JAVA_HOME}/bin/java")
+  message(FATAL_ERROR "$JAVA_HOME (${JAVA_HOME}) does not contain bin/java")
+endif()
+
+if (DEFINED JavaHome_FIND_REQUIRED AND NOT DEFINED JAVA_HOME_FOUND)
+  message(FATAL_ERROR "failed to find JAVA_HOME")
+else()
+  message("Found JAVA_HOME: ${JAVA_HOME}")
+endif()

http://git-wip-us.apache.org/repos/asf/kudu/blob/31d16f74/cmake_modules/FindThrift.cmake
----------------------------------------------------------------------
diff --git a/cmake_modules/FindThrift.cmake b/cmake_modules/FindThrift.cmake
new file mode 100644
index 0000000..c741026
--- /dev/null
+++ b/cmake_modules/FindThrift.cmake
@@ -0,0 +1,161 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#########
+#
+# Locate and configure the Thrift library.
+# Defines the following variables:
+#
+#   THRIFT_INCLUDE_DIR - the include directory for thrift headers
+#   THRIFT_SHARED_LIBRARY - path to thrift's shared library
+#   THRIFT_STATIC_LIBRARY - path to thrift's static library
+#   THRIFT_EXECUTABLE - the thrift compiler
+#   THRIFT_FOUND - whether the Thrift library and executable has been found
+#
+#  ====================================================================
+#  Example:
+#
+#   find_package(Thrift REQUIRED)
+#   include_directories(${THRIFT_INCLUDE_DIR})
+#
+#   include_directories(${CMAKE_CURRENT_BINARY_DIR})
+#   THRIFT_GENERATE_CPP(THRIFT_SRCS THRIFT_HDRS THRIFT_TGTS
+#     [SOURCE_ROOT <root from which source is found>]
+#     [BINARY_ROOT <root into which binaries are built>]
+#     THRIFT_FILES foo.thrift)
+#   add_executable(bar bar.cc ${THRIFT_SRCS} ${THRIFT_HDRS})
+#   target_link_libraries(bar ${THRIFT_SHARED_LIBRARY})
+#
+#  ====================================================================
+#
+# THRIFT_GENERATE_CPP (public function)
+#   SRCS = Variable to define with autogenerated
+#          source files
+#   HDRS = Variable to define with autogenerated
+#          header files
+#   TGTS = Variable to define with autogenerated
+#          custom targets; if SRCS/HDRS need to be used in multiple
+#          libraries, those libraries should depend on these targets
+#          in order to "serialize" the thrift invocations
+#  ====================================================================
+
+function(THRIFT_GENERATE_CPP SRCS HDRS TGTS)
+  if(NOT ARGN)
+    message(SEND_ERROR "Error: THRIFT_GENERATE_CPP() called without any thrift files")
+    return()
+  endif(NOT ARGN)
+
+  set(options)
+  set(one_value_args SOURCE_ROOT BINARY_ROOT)
+  set(multi_value_args EXTRA_THRIFT_PATHS THRIFT_FILES)
+  cmake_parse_arguments(ARG "${options}" "${one_value_args}" "${multi_value_args}" ${ARGN})
+  if(ARG_UNPARSED_ARGUMENTS)
+    message(SEND_ERROR "Error: unrecognized arguments: ${ARG_UNPARSED_ARGUMENTS}")
+  endif()
+
+  set(${SRCS})
+  set(${HDRS})
+  set(${TGTS})
+
+  set(EXTRA_THRIFT_PATH_ARGS)
+  foreach(PP ${ARG_EXTRA_THRIFT_PATHS})
+    set(EXTRA_THRIFT_PATH_ARGS ${EXTRA_THRIFT_PATH_ARGS} -I ${PP})
+  endforeach()
+
+  if("${ARG_SOURCE_ROOT}" STREQUAL "")
+    SET(ARG_SOURCE_ROOT "${CMAKE_CURRENT_SOURCE_DIR}")
+  endif()
+  GET_FILENAME_COMPONENT(ARG_SOURCE_ROOT ${ARG_SOURCE_ROOT} ABSOLUTE)
+
+  if("${ARG_BINARY_ROOT}" STREQUAL "")
+    SET(ARG_BINARY_ROOT "${CMAKE_CURRENT_BINARY_DIR}")
+  endif()
+  GET_FILENAME_COMPONENT(ARG_BINARY_ROOT ${ARG_BINARY_ROOT} ABSOLUTE)
+
+  foreach(FIL ${ARG_THRIFT_FILES})
+    get_filename_component(ABS_FIL ${FIL} ABSOLUTE)
+    get_filename_component(FIL_WE ${FIL} NAME_WE)
+
+    set(THRIFT_H_OUT "${ARG_BINARY_ROOT}/${FIL_WE}_types.h" "${ARG_BINARY_ROOT}/${FIL_WE}_constants.h")
+    set(THRIFT_CC_OUT "${ARG_BINARY_ROOT}/${FIL_WE}_constants.cpp" "${ARG_BINARY_ROOT}/${FIL_WE}_types.cpp")
+
+    execute_process(COMMAND awk "/^service/ { print $2 }" "${ABS_FIL}"
+                    OUTPUT_VARIABLE SERVICES
+                    OUTPUT_STRIP_TRAILING_WHITESPACE)
+
+    foreach(SERVICE ${SERVICES})
+      list(APPEND THRIFT_H_OUT "${ARG_BINARY_ROOT}/${SERVICE}.h")
+      list(APPEND THRIFT_CC_OUT "${ARG_BINARY_ROOT}/${SERVICE}.cpp")
+    endforeach()
+
+    # TODO(dan): Add the fb303 files manually. This is a complete hack.
+    list(APPEND ${SRCS} "${THRIFT_CC_OUT}" "fb303_types.cpp" "fb303_constants.cpp" "FacebookService.cpp")
+    list(APPEND ${HDRS} "${THRIFT_H_OUT}" "fb303_types.h" "fb303_constants.h" "FacebookService.h")
+
+    add_custom_command(
+      OUTPUT ${THRIFT_CC_OUT} ${THRIFT_H_OUT}
+      COMMAND  ${THRIFT_EXECUTABLE}
+      ARGS
+        --gen cpp:moveable_types
+        --recurse
+        --out ${ARG_BINARY_ROOT}
+        -I ${ARG_SOURCE_ROOT}
+        # Used to find built-in .thrift files (e.g. fb303.thrift)
+        -I ${THIRDPARTY_INSTALL_CURRENT_DIR}
+        ${EXTRA_THRIFT_PATH_ARGS} ${ABS_FIL}
+      COMMENT "Running C++ thrift compiler on ${FIL}"
+      VERBATIM )
+
+    # This custom target enforces that there's just one invocation of thrift
+    # when there are multiple consumers of the generated files. The target name
+    # must be unique; adding parts of the filename helps ensure this.
+    set(TGT_NAME "${ARG_BINARY_ROOT}/${FIL}")
+    string(REPLACE "/" "-" TGT_NAME ${TGT_NAME})
+    add_custom_target(${TGT_NAME}
+      DEPENDS "${THRIFT_CC_OUT}" "${THRIFT_H_OUT}")
+    list(APPEND ${TGTS} "${TGT_NAME}")
+  endforeach()
+
+  set_source_files_properties(${${SRCS}} ${${HDRS}} PROPERTIES GENERATED TRUE)
+  set(${SRCS} ${${SRCS}} PARENT_SCOPE)
+  set(${HDRS} ${${HDRS}} PARENT_SCOPE)
+  set(${TGTS} ${${TGTS}} PARENT_SCOPE)
+endfunction()
+
+find_path(THRIFT_INCLUDE_DIR thrift/Thrift.h
+  NO_CMAKE_SYSTEM_PATH
+  NO_SYSTEM_ENVIRONMENT_PATH)
+
+find_library(THRIFT_SHARED_LIBRARY thrift
+             DOC "The Thrift Library"
+             NO_CMAKE_SYSTEM_PATH
+             NO_SYSTEM_ENVIRONMENT_PATH)
+
+find_library(THRIFT_STATIC_LIBRARY libthrift.a
+  DOC "Static version of the Thrift Library"
+  NO_CMAKE_SYSTEM_PATH
+  NO_SYSTEM_ENVIRONMENT_PATH)
+
+find_program(THRIFT_EXECUTABLE thrift
+  DOC "The Thrift Compiler"
+  NO_CMAKE_SYSTEM_PATH
+  NO_SYSTEM_ENVIRONMENT_PATH)
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(THRIFT REQUIRED_VARS
+  THRIFT_SHARED_LIBRARY THRIFT_STATIC_LIBRARY
+  THRIFT_INCLUDE_DIR THRIFT_EXECUTABLE)

http://git-wip-us.apache.org/repos/asf/kudu/blob/31d16f74/src/kudu/hms/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/src/kudu/hms/CMakeLists.txt b/src/kudu/hms/CMakeLists.txt
new file mode 100644
index 0000000..e9f92a5
--- /dev/null
+++ b/src/kudu/hms/CMakeLists.txt
@@ -0,0 +1,77 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+THRIFT_GENERATE_CPP(
+  HMS_THRIFT_SRCS HMS_THRIFT_HDRS HMS_THRIFT_TGTS
+  THRIFT_FILES hive_metastore.thrift)
+
+add_library(hms_thrift ${HMS_THRIFT_SRCS})
+target_link_libraries(hms_thrift thrift)
+add_dependencies(hms_thrift ${HMS_THRIFT_TGTS})
+
+set(HMS_SRCS
+  hms_client.cc)
+set(HMS_DEPS
+  glog
+  hms_thrift
+  kudu_util)
+
+add_library(kudu_hms ${HMS_SRCS})
+target_link_libraries(kudu_hms ${HMS_DEPS})
+
+##############################
+# mini_hms
+##############################
+
+execute_process(COMMAND ln -nsf
+                "${CMAKE_SOURCE_DIR}/thirdparty/installed/common/opt/hive"
+                "${EXECUTABLE_OUTPUT_PATH}/hive-home")
+execute_process(COMMAND ln -nsf
+                "${CMAKE_SOURCE_DIR}/thirdparty/installed/common/opt/hadoop"
+                "${EXECUTABLE_OUTPUT_PATH}/hadoop-home")
+execute_process(COMMAND ln -nsf
+                "${JAVA_HOME}"
+                "${EXECUTABLE_OUTPUT_PATH}/java-home")
+
+file(GLOB DEPENDENCY_JARS
+  "${CMAKE_SOURCE_DIR}/thirdparty/installed/common/opt/hive/lib/*"
+  "${CMAKE_SOURCE_DIR}/thirdparty/installed/common/opt/hadoop/share/hadoop/common/*")
+
+add_jar(hms-plugin
+  "${CMAKE_SOURCE_DIR}/java/kudu-hive/src/main/java/org/apache/kudu/hive/metastore/KuduMetastorePlugin.java"
+  INCLUDE_JARS ${DEPENDENCY_JARS}
+  OUTPUT_DIR "${EXECUTABLE_OUTPUT_PATH}")
+
+set(MINI_HMS_SRCS
+  mini_hms.cc)
+
+add_library(mini_hms ${MINI_HMS_SRCS})
+target_link_libraries(mini_hms
+  gutil
+  kudu_test_util
+  kudu_util)
+add_dependencies(mini_hms hms-plugin)
+
+# Tests
+if (NOT NO_TESTS)
+  set(KUDU_TEST_LINK_LIBS
+    kudu_hms
+    mini_hms
+    ${KUDU_MIN_TEST_LIBS})
+
+  ADD_KUDU_TEST(hms_client-test)
+endif()

http://git-wip-us.apache.org/repos/asf/kudu/blob/31d16f74/src/kudu/hms/hive_metastore.thrift
----------------------------------------------------------------------
diff --git a/src/kudu/hms/hive_metastore.thrift b/src/kudu/hms/hive_metastore.thrift
new file mode 100644
index 0000000..448ce6c
--- /dev/null
+++ b/src/kudu/hms/hive_metastore.thrift
@@ -0,0 +1,1536 @@
+#!/usr/local/bin/thrift -java
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+# DO NOT MODIFY! Copied from
+# https://raw.githubusercontent.com/apache/hive/rel/release-2.3.0/metastore/if/hive_metastore.thrift
+# With backports:
+#   - HIVE-16993
+#
+# Before updating to a new version, consider that Kudu must remain compatible
+# with a range of Hive Metastore versions.
+
+#
+# Thrift Service that the MetaStore is built on
+#
+
+include "share/fb303/if/fb303.thrift"
+
+namespace java org.apache.hadoop.hive.metastore.api
+namespace php metastore
+namespace cpp Apache.Hadoop.Hive
+
+const string DDL_TIME = "transient_lastDdlTime"
+
+struct Version {
+  1: string version,
+  2: string comments
+}
+
+struct FieldSchema {
+  1: string name, // name of the field
+  2: string type, // type of the field. primitive types defined above, specify list<TYPE_NAME>, map<TYPE_NAME, TYPE_NAME> for lists & maps
+  3: string comment
+}
+
+struct SQLPrimaryKey {
+  1: string table_db,    // table schema
+  2: string table_name,  // table name
+  3: string column_name, // column name
+  4: i32 key_seq,        // sequence number within primary key
+  5: string pk_name,     // primary key name
+  6: bool enable_cstr,   // Enable/Disable
+  7: bool validate_cstr,  // Validate/No validate
+  8: bool rely_cstr      // Rely/No Rely
+}
+
+struct SQLForeignKey {
+  1: string pktable_db,    // primary key table schema
+  2: string pktable_name,  // primary key table name
+  3: string pkcolumn_name, // primary key column name
+  4: string fktable_db,    // foreign key table schema
+  5: string fktable_name,  // foreign key table name
+  6: string fkcolumn_name, // foreign key column name
+  7: i32 key_seq,          // sequence within foreign key
+  8: i32 update_rule,      // what happens to foreign key when parent key is updated
+  9: i32 delete_rule,      // what happens to foreign key when parent key is deleted
+  10: string fk_name,      // foreign key name
+  11: string pk_name,      // primary key name
+  12: bool enable_cstr,    // Enable/Disable
+  13: bool validate_cstr,  // Validate/No validate
+  14: bool rely_cstr       // Rely/No Rely
+}
+
+struct Type {
+  1: string          name,             // one of the types in PrimitiveTypes or CollectionTypes or User defined types
+  2: optional string type1,            // object type if the name is 'list' (LIST_TYPE), key type if the name is 'map' (MAP_TYPE)
+  3: optional string type2,            // val type if the name is 'map' (MAP_TYPE)
+  4: optional list<FieldSchema> fields // if the name is one of the user defined types
+}
+
+enum HiveObjectType {
+  GLOBAL = 1,
+  DATABASE = 2,
+  TABLE = 3,
+  PARTITION = 4,
+  COLUMN = 5,
+}
+
+enum PrincipalType {
+  USER = 1,
+  ROLE = 2,
+  GROUP = 3,
+}
+
+const string HIVE_FILTER_FIELD_OWNER = "hive_filter_field_owner__"
+const string HIVE_FILTER_FIELD_PARAMS = "hive_filter_field_params__"
+const string HIVE_FILTER_FIELD_LAST_ACCESS = "hive_filter_field_last_access__"
+
+enum PartitionEventType {
+  LOAD_DONE = 1,
+}
+
+// Enums for transaction and lock management 
+enum TxnState {
+    COMMITTED = 1,
+    ABORTED = 2,
+    OPEN = 3,
+}
+
+enum LockLevel {
+    DB = 1,
+    TABLE = 2,
+    PARTITION = 3,
+}
+
+enum LockState {
+    ACQUIRED = 1,       // requester has the lock
+    WAITING = 2,        // requester is waiting for the lock and should call checklock at a later point to see if the lock has been obtained.
+    ABORT = 3,          // the lock has been aborted, most likely due to timeout
+    NOT_ACQUIRED = 4,   // returned only with lockNoWait, indicates the lock was not available and was not acquired
+}
+
+enum LockType {
+    SHARED_READ = 1,
+    SHARED_WRITE = 2,
+    EXCLUSIVE = 3,
+}
+
+enum CompactionType {
+    MINOR = 1,
+    MAJOR = 2,
+}
+
+enum GrantRevokeType {
+    GRANT = 1,
+    REVOKE = 2,
+}
+
+enum DataOperationType {
+    SELECT = 1,
+    INSERT = 2
+    UPDATE = 3,
+    DELETE = 4,
+    UNSET = 5,//this is the default to distinguish from NULL from old clients
+    NO_TXN = 6,//drop table, insert overwrite, etc - something non-transactional
+}
+
+// Types of events the client can request that the metastore fire.  For now just support DML operations, as the metastore knows
+// about DDL operations and there's no reason for the client to request such an event.
+enum EventRequestType {
+    INSERT = 1,
+    UPDATE = 2,
+    DELETE = 3,
+}
+
+struct HiveObjectRef{
+  1: HiveObjectType objectType,
+  2: string dbName,
+  3: string objectName,
+  4: list<string> partValues,
+  5: string columnName,
+}
+
+struct PrivilegeGrantInfo {
+  1: string privilege,
+  2: i32 createTime,
+  3: string grantor,
+  4: PrincipalType grantorType,
+  5: bool grantOption,
+}
+
+struct HiveObjectPrivilege {
+  1: HiveObjectRef  hiveObject,
+  2: string principalName,
+  3: PrincipalType principalType,
+  4: PrivilegeGrantInfo grantInfo,
+}
+
+struct PrivilegeBag {
+  1: list<HiveObjectPrivilege> privileges,
+}
+
+struct PrincipalPrivilegeSet {
+  1: map<string, list<PrivilegeGrantInfo>> userPrivileges, // user name -> privilege grant info
+  2: map<string, list<PrivilegeGrantInfo>> groupPrivileges, // group name -> privilege grant info
+  3: map<string, list<PrivilegeGrantInfo>> rolePrivileges, //role name -> privilege grant info
+}
+
+struct GrantRevokePrivilegeRequest {
+  1: GrantRevokeType requestType;
+  2: PrivilegeBag privileges;
+  3: optional bool revokeGrantOption;  // Only for revoke request
+}
+
+struct GrantRevokePrivilegeResponse {
+  1: optional bool success;
+}
+
+struct Role {
+  1: string roleName,
+  2: i32 createTime,
+  3: string ownerName,
+}
+
+// Representation of a grant for a principal to a role
+struct RolePrincipalGrant {
+  1: string roleName,
+  2: string principalName,
+  3: PrincipalType principalType,
+  4: bool grantOption,
+  5: i32 grantTime,
+  6: string grantorName,
+  7: PrincipalType grantorPrincipalType
+}
+
+struct GetRoleGrantsForPrincipalRequest {
+  1: required string principal_name,
+  2: required PrincipalType principal_type
+}
+
+struct GetRoleGrantsForPrincipalResponse {
+  1: required list<RolePrincipalGrant> principalGrants;
+}
+
+struct GetPrincipalsInRoleRequest {
+  1: required string roleName;
+}
+
+struct GetPrincipalsInRoleResponse {
+  1: required list<RolePrincipalGrant> principalGrants;
+}
+
+struct GrantRevokeRoleRequest {
+  1: GrantRevokeType requestType;
+  2: string roleName;
+  3: string principalName;
+  4: PrincipalType principalType;
+  5: optional string grantor;            // Needed for grant
+  6: optional PrincipalType grantorType; // Needed for grant
+  7: optional bool grantOption;
+}
+
+struct GrantRevokeRoleResponse {
+  1: optional bool success;
+}
+
+// namespace for tables
+struct Database {
+  1: string name,
+  2: string description,
+  3: optional string locationUri,
+  4: map<string, string> parameters, // properties associated with the database
+  5: optional PrincipalPrivilegeSet privileges,
+  6: optional string ownerName,
+  7: optional PrincipalType ownerType
+}
+
+// This object holds the information needed by SerDes
+struct SerDeInfo {
+  1: string name,                   // name of the serde, table name by default
+  2: string serializationLib,       // usually the class that implements the extractor & loader
+  3: map<string, string> parameters // initialization parameters
+}
+
+// sort order of a column (column name along with asc(1)/desc(0))
+struct Order {
+  1: string col,      // sort column name
+  2: i32    order     // asc(1) or desc(0)
+}
+
+// this object holds all the information about skewed table
+struct SkewedInfo {
+  1: list<string> skewedColNames, // skewed column names
+  2: list<list<string>> skewedColValues, //skewed values
+  3: map<list<string>, string> skewedColValueLocationMaps, //skewed value to location mappings
+}
+
+// this object holds all the information about physical storage of the data belonging to a table
+struct StorageDescriptor {
+  1: list<FieldSchema> cols,  // required (refer to types defined above)
+  2: string location,         // defaults to <warehouse loc>/<db loc>/tablename
+  3: string inputFormat,      // SequenceFileInputFormat (binary) or TextInputFormat`  or custom format
+  4: string outputFormat,     // SequenceFileOutputFormat (binary) or IgnoreKeyTextOutputFormat or custom format
+  5: bool   compressed,       // compressed or not
+  6: i32    numBuckets,       // this must be specified if there are any dimension columns
+  7: SerDeInfo    serdeInfo,  // serialization and deserialization information
+  8: list<string> bucketCols, // reducer grouping columns and clustering columns and bucketing columns`
+  9: list<Order>  sortCols,   // sort order of the data in each bucket
+  10: map<string, string> parameters, // any user supplied key value hash
+  11: optional SkewedInfo skewedInfo, // skewed information
+  12: optional bool   storedAsSubDirectories       // stored as subdirectories or not
+}
+
+// table information
+struct Table {
+  1: string tableName,                // name of the table
+  2: string dbName,                   // database name ('default')
+  3: string owner,                    // owner of this table
+  4: i32    createTime,               // creation time of the table
+  5: i32    lastAccessTime,           // last access time (usually this will be filled from HDFS and shouldn't be relied on)
+  6: i32    retention,                // retention time
+  7: StorageDescriptor sd,            // storage descriptor of the table
+  8: list<FieldSchema> partitionKeys, // partition keys of the table. only primitive types are supported
+  9: map<string, string> parameters,   // to store comments or any other user level parameters
+  10: string viewOriginalText,         // original view text, null for non-view
+  11: string viewExpandedText,         // expanded view text, null for non-view
+  12: string tableType,                // table type enum, e.g. EXTERNAL_TABLE
+  13: optional PrincipalPrivilegeSet privileges,
+  14: optional bool temporary=false,
+  15: optional bool rewriteEnabled     // rewrite enabled or not
+}
+
+struct Partition {
+  1: list<string> values // string value is converted to appropriate partition key type
+  2: string       dbName,
+  3: string       tableName,
+  4: i32          createTime,
+  5: i32          lastAccessTime,
+  6: StorageDescriptor   sd,
+  7: map<string, string> parameters,
+  8: optional PrincipalPrivilegeSet privileges
+}
+
+struct PartitionWithoutSD {
+  1: list<string> values // string value is converted to appropriate partition key type
+  2: i32          createTime,
+  3: i32          lastAccessTime,
+  4: string       relativePath,
+  5: map<string, string> parameters,
+  6: optional PrincipalPrivilegeSet privileges
+}
+
+struct PartitionSpecWithSharedSD {
+  1: list<PartitionWithoutSD> partitions,
+  2: StorageDescriptor sd,
+}
+
+struct PartitionListComposingSpec {
+  1: list<Partition> partitions
+}
+
+struct PartitionSpec {
+  1: string dbName,
+  2: string tableName,
+  3: string rootPath,
+  4: optional PartitionSpecWithSharedSD sharedSDPartitionSpec,
+  5: optional PartitionListComposingSpec partitionList
+}
+
+struct Index {
+  1: string       indexName, // unique with in the whole database namespace
+  2: string       indexHandlerClass, // reserved
+  3: string       dbName,
+  4: string       origTableName,
+  5: i32          createTime,
+  6: i32          lastAccessTime,
+  7: string       indexTableName,
+  8: StorageDescriptor   sd,
+  9: map<string, string> parameters,
+  10: bool         deferredRebuild
+}
+
+// column statistics
+struct BooleanColumnStatsData {
+1: required i64 numTrues,
+2: required i64 numFalses,
+3: required i64 numNulls,
+4: optional string bitVectors
+}
+
+struct DoubleColumnStatsData {
+1: optional double lowValue,
+2: optional double highValue,
+3: required i64 numNulls,
+4: required i64 numDVs,
+5: optional string bitVectors
+}
+
+struct LongColumnStatsData {
+1: optional i64 lowValue,
+2: optional i64 highValue,
+3: required i64 numNulls,
+4: required i64 numDVs,
+5: optional string bitVectors
+}
+
+struct StringColumnStatsData {
+1: required i64 maxColLen,
+2: required double avgColLen,
+3: required i64 numNulls,
+4: required i64 numDVs,
+5: optional string bitVectors
+}
+
+struct BinaryColumnStatsData {
+1: required i64 maxColLen,
+2: required double avgColLen,
+3: required i64 numNulls,
+4: optional string bitVectors
+}
+
+
+struct Decimal {
+1: required binary unscaled,
+3: required i16 scale
+}
+
+struct DecimalColumnStatsData {
+1: optional Decimal lowValue,
+2: optional Decimal highValue,
+3: required i64 numNulls,
+4: required i64 numDVs,
+5: optional string bitVectors
+}
+
+struct Date {
+1: required i64 daysSinceEpoch
+}
+
+struct DateColumnStatsData {
+1: optional Date lowValue,
+2: optional Date highValue,
+3: required i64 numNulls,
+4: required i64 numDVs,
+5: optional string bitVectors
+}
+
+union ColumnStatisticsData {
+1: BooleanColumnStatsData booleanStats,
+2: LongColumnStatsData longStats,
+3: DoubleColumnStatsData doubleStats,
+4: StringColumnStatsData stringStats,
+5: BinaryColumnStatsData binaryStats,
+6: DecimalColumnStatsData decimalStats,
+7: DateColumnStatsData dateStats
+}
+
+struct ColumnStatisticsObj {
+1: required string colName,
+2: required string colType,
+3: required ColumnStatisticsData statsData
+}
+
+struct ColumnStatisticsDesc {
+1: required bool isTblLevel,
+2: required string dbName,
+3: required string tableName,
+4: optional string partName,
+5: optional i64 lastAnalyzed
+}
+
+struct ColumnStatistics {
+1: required ColumnStatisticsDesc statsDesc,
+2: required list<ColumnStatisticsObj> statsObj;
+}
+
+struct AggrStats {
+1: required list<ColumnStatisticsObj> colStats,
+2: required i64 partsFound // number of partitions for which stats were found
+}
+
+struct SetPartitionsStatsRequest {
+1: required list<ColumnStatistics> colStats,
+2: optional bool needMerge //stats need to be merged with the existing stats
+}
+
+// schema of the table/query results etc.
+struct Schema {
+ // column names, types, comments
+ 1: list<FieldSchema> fieldSchemas,  // delimiters etc
+ 2: map<string, string> properties
+}
+
+// Key-value store to be used with selected
+// Metastore APIs (create, alter methods).
+// The client can pass environment properties / configs that can be
+// accessed in hooks.
+struct EnvironmentContext {
+  1: map<string, string> properties
+}
+
+struct PrimaryKeysRequest {
+  1: required string db_name,
+  2: required string tbl_name
+}
+
+struct PrimaryKeysResponse {
+  1: required list<SQLPrimaryKey> primaryKeys
+}
+
+struct ForeignKeysRequest {
+  1: string parent_db_name,
+  2: string parent_tbl_name,
+  3: string foreign_db_name,
+  4: string foreign_tbl_name
+}
+
+struct ForeignKeysResponse {
+  1: required list<SQLForeignKey> foreignKeys
+}
+
+struct DropConstraintRequest {
+  1: required string dbname, 
+  2: required string tablename,
+  3: required string constraintname
+}
+
+struct AddPrimaryKeyRequest {
+  1: required list<SQLPrimaryKey> primaryKeyCols
+}
+
+struct AddForeignKeyRequest {
+  1: required list<SQLForeignKey> foreignKeyCols
+}
+
+// Return type for get_partitions_by_expr
+struct PartitionsByExprResult {
+  1: required list<Partition> partitions,
+  // Whether the results has any (currently, all) partitions which may or may not match
+  2: required bool hasUnknownPartitions
+}
+
+struct PartitionsByExprRequest {
+  1: required string dbName,
+  2: required string tblName,
+  3: required binary expr,
+  4: optional string defaultPartitionName,
+  5: optional i16 maxParts=-1
+}
+
+struct TableStatsResult {
+  1: required list<ColumnStatisticsObj> tableStats
+}
+
+struct PartitionsStatsResult {
+  1: required map<string, list<ColumnStatisticsObj>> partStats
+}
+
+struct TableStatsRequest {
+ 1: required string dbName,
+ 2: required string tblName,
+ 3: required list<string> colNames
+}
+
+struct PartitionsStatsRequest {
+ 1: required string dbName,
+ 2: required string tblName,
+ 3: required list<string> colNames,
+ 4: required list<string> partNames
+}
+
+// Return type for add_partitions_req
+struct AddPartitionsResult {
+  1: optional list<Partition> partitions,
+}
+
+// Request type for add_partitions_req
+struct AddPartitionsRequest {
+  1: required string dbName,
+  2: required string tblName,
+  3: required list<Partition> parts,
+  4: required bool ifNotExists,
+  5: optional bool needResult=true
+}
+
+// Return type for drop_partitions_req
+struct DropPartitionsResult {
+  1: optional list<Partition> partitions,
+}
+
+struct DropPartitionsExpr {
+  1: required binary expr;
+  2: optional i32 partArchiveLevel;
+}
+
+union RequestPartsSpec {
+  1: list<string> names;
+  2: list<DropPartitionsExpr> exprs;
+}
+
+// Request type for drop_partitions_req
+// TODO: we might want to add "bestEffort" flag; where a subset can fail
+struct DropPartitionsRequest {
+  1: required string dbName,
+  2: required string tblName,
+  3: required RequestPartsSpec parts,
+  4: optional bool deleteData,
+  5: optional bool ifExists=true, // currently verified on client
+  6: optional bool ignoreProtection,
+  7: optional EnvironmentContext environmentContext,
+  8: optional bool needResult=true
+}
+
+enum FunctionType {
+  JAVA = 1,
+}
+
+enum ResourceType {
+  JAR     = 1,
+  FILE    = 2,
+  ARCHIVE = 3,
+}
+
+struct ResourceUri {
+  1: ResourceType resourceType,
+  2: string       uri,
+}
+
+// User-defined function
+struct Function {
+  1: string           functionName,
+  2: string           dbName,
+  3: string           className,
+  4: string           ownerName,
+  5: PrincipalType    ownerType,
+  6: i32              createTime,
+  7: FunctionType     functionType,
+  8: list<ResourceUri> resourceUris,
+}
+
+// Structs for transaction and locks
+struct TxnInfo {
+    1: required i64 id,
+    2: required TxnState state,
+    3: required string user,        // used in 'show transactions' to help admins find who has open transactions
+    4: required string hostname,    // used in 'show transactions' to help admins find who has open transactions
+    5: optional string agentInfo = "Unknown",
+    6: optional i32 heartbeatCount=0,
+    7: optional string metaInfo,
+    8: optional i64 startedTime,
+    9: optional i64 lastHeartbeatTime,
+}
+
+struct GetOpenTxnsInfoResponse {
+    1: required i64 txn_high_water_mark,
+    2: required list<TxnInfo> open_txns,
+}
+
+struct GetOpenTxnsResponse {
+    1: required i64 txn_high_water_mark,
+    2: required set<i64> open_txns,
+    3: optional i64 min_open_txn, //since 1.3,2.2
+}
+
+struct OpenTxnRequest {
+    1: required i32 num_txns,
+    2: required string user,
+    3: required string hostname,
+    4: optional string agentInfo = "Unknown",
+}
+
+struct OpenTxnsResponse {
+    1: required list<i64> txn_ids,
+}
+
+struct AbortTxnRequest {
+    1: required i64 txnid,
+}
+
+struct AbortTxnsRequest {
+    1: required list<i64> txn_ids,
+}
+
+struct CommitTxnRequest {
+    1: required i64 txnid,
+}
+
+struct LockComponent {
+    1: required LockType type,
+    2: required LockLevel level,
+    3: required string dbname,
+    4: optional string tablename,
+    5: optional string partitionname,
+    6: optional DataOperationType operationType = DataOperationType.UNSET,
+    7: optional bool isAcid = false,
+    8: optional bool isDynamicPartitionWrite = false
+}
+
+struct LockRequest {
+    1: required list<LockComponent> component,
+    2: optional i64 txnid,
+    3: required string user,     // used in 'show locks' to help admins find who has open locks
+    4: required string hostname, // used in 'show locks' to help admins find who has open locks
+    5: optional string agentInfo = "Unknown",
+}
+
+struct LockResponse {
+    1: required i64 lockid,
+    2: required LockState state,
+}
+
+struct CheckLockRequest {
+    1: required i64 lockid,
+    2: optional i64 txnid,
+    3: optional i64 elapsed_ms,
+}
+
+struct UnlockRequest {
+    1: required i64 lockid,
+}
+
+struct ShowLocksRequest {
+    1: optional string dbname,
+    2: optional string tablename,
+    3: optional string partname,
+    4: optional bool isExtended=false,
+}
+
+struct ShowLocksResponseElement {
+    1: required i64 lockid,
+    2: required string dbname,
+    3: optional string tablename,
+    4: optional string partname,
+    5: required LockState state,
+    6: required LockType type,
+    7: optional i64 txnid,
+    8: required i64 lastheartbeat,
+    9: optional i64 acquiredat,
+    10: required string user,
+    11: required string hostname,
+    12: optional i32 heartbeatCount = 0,
+    13: optional string agentInfo,
+    14: optional i64 blockedByExtId,
+    15: optional i64 blockedByIntId,
+    16: optional i64 lockIdInternal,
+}
+
+struct ShowLocksResponse {
+    1: list<ShowLocksResponseElement> locks,
+}
+
+struct HeartbeatRequest {
+    1: optional i64 lockid,
+    2: optional i64 txnid
+}
+
+struct HeartbeatTxnRangeRequest {
+    1: required i64 min,
+    2: required i64 max
+}
+
+struct HeartbeatTxnRangeResponse {
+    1: required set<i64> aborted,
+    2: required set<i64> nosuch
+}
+
+struct CompactionRequest {
+    1: required string dbname,
+    2: required string tablename,
+    3: optional string partitionname,
+    4: required CompactionType type,
+    5: optional string runas,
+    6: optional map<string, string> properties
+}
+
+struct CompactionResponse {
+    1: required i64 id,
+    2: required string state,
+    3: required bool accepted
+}
+
+struct ShowCompactRequest {
+}
+
+struct ShowCompactResponseElement {
+    1: required string dbname,
+    2: required string tablename,
+    3: optional string partitionname,
+    4: required CompactionType type,
+    5: required string state,
+    6: optional string workerid,
+    7: optional i64 start,
+    8: optional string runAs,
+    9: optional i64 hightestTxnId, // Highest Txn ID handled by this compaction
+    10: optional string metaInfo,
+    11: optional i64 endTime,
+    12: optional string hadoopJobId = "None",
+    13: optional i64 id,
+}
+
+struct ShowCompactResponse {
+    1: required list<ShowCompactResponseElement> compacts,
+}
+
+struct AddDynamicPartitions {
+    1: required i64 txnid,
+    2: required string dbname,
+    3: required string tablename,
+    4: required list<string> partitionnames,
+    5: optional DataOperationType operationType = DataOperationType.UNSET
+}
+
+struct NotificationEventRequest {
+    1: required i64 lastEvent,
+    2: optional i32 maxEvents,
+}
+
+struct NotificationEvent {
+    1: required i64 eventId,
+    2: required i32 eventTime,
+    3: required string eventType,
+    4: optional string dbName,
+    5: optional string tableName,
+    6: required string message,
+    7: optional string messageFormat,
+}
+
+struct NotificationEventResponse {
+    1: required list<NotificationEvent> events,
+}
+
+struct CurrentNotificationEventId {
+    1: required i64 eventId,
+}
+
+struct InsertEventRequestData {
+    1: required list<string> filesAdded,
+    // Checksum of files (hex string of checksum byte payload)
+    2: optional list<string> filesAddedChecksum,
+}
+
+union FireEventRequestData {
+    1: InsertEventRequestData insertData
+}
+
+struct FireEventRequest {
+    1: required bool successful,
+    2: required FireEventRequestData data
+    // dbname, tablename, and partition vals are included as optional in the top level event rather than placed in each type of
+    // subevent as I assume they'll be used across most event types.
+    3: optional string dbName,
+    4: optional string tableName,
+    5: optional list<string> partitionVals,
+}
+
+struct FireEventResponse {
+    // NOP for now, this is just a place holder for future responses
+}
+    
+struct MetadataPpdResult {
+  1: optional binary metadata,
+  2: optional binary includeBitset
+}
+
+// Return type for get_file_metadata_by_expr
+struct GetFileMetadataByExprResult {
+  1: required map<i64, MetadataPpdResult> metadata,
+  2: required bool isSupported
+}
+
+enum FileMetadataExprType {
+  ORC_SARG = 1
+}
+
+
+// Request type for get_file_metadata_by_expr
+struct GetFileMetadataByExprRequest {
+  1: required list<i64> fileIds,
+  2: required binary expr,
+  3: optional bool doGetFooters,
+  4: optional FileMetadataExprType type
+}
+
+// Return type for get_file_metadata
+struct GetFileMetadataResult {
+  1: required map<i64, binary> metadata,
+  2: required bool isSupported
+}
+
+// Request type for get_file_metadata
+struct GetFileMetadataRequest {
+  1: required list<i64> fileIds
+}
+
+// Return type for put_file_metadata
+struct PutFileMetadataResult {
+}
+
+// Request type for put_file_metadata
+struct PutFileMetadataRequest {
+  1: required list<i64> fileIds,
+  2: required list<binary> metadata,
+  3: optional FileMetadataExprType type
+}
+
+// Return type for clear_file_metadata
+struct ClearFileMetadataResult {
+}
+
+// Request type for clear_file_metadata
+struct ClearFileMetadataRequest {
+  1: required list<i64> fileIds
+}
+
+// Return type for cache_file_metadata
+struct CacheFileMetadataResult {
+  1: required bool isSupported
+}
+
+// Request type for cache_file_metadata
+struct CacheFileMetadataRequest {
+  1: required string dbName,
+  2: required string tblName,
+  3: optional string partName,
+  4: optional bool isAllParts
+}
+
+struct GetAllFunctionsResponse {
+  1: optional list<Function> functions
+}
+
+enum ClientCapability {
+  TEST_CAPABILITY = 1
+}
+
+
+struct ClientCapabilities {
+  1: required list<ClientCapability> values
+}
+
+struct GetTableRequest {
+  1: required string dbName,
+  2: required string tblName,
+  3: optional ClientCapabilities capabilities
+}
+
+struct GetTableResult {
+  1: required Table table
+}
+
+struct GetTablesRequest {
+  1: required string dbName,
+  2: optional list<string> tblNames,
+  3: optional ClientCapabilities capabilities
+}
+
+struct GetTablesResult {
+  1: required list<Table> tables
+}
+
+struct TableMeta {
+  1: required string dbName;
+  2: required string tableName;
+  3: required string tableType;
+  4: optional string comments;
+}
+
+exception MetaException {
+  1: string message
+}
+
+exception UnknownTableException {
+  1: string message
+}
+
+exception UnknownDBException {
+  1: string message
+}
+
+exception AlreadyExistsException {
+  1: string message
+}
+
+exception InvalidPartitionException {
+  1: string message
+}
+
+exception UnknownPartitionException {
+  1: string message
+}
+
+exception InvalidObjectException {
+  1: string message
+}
+
+exception NoSuchObjectException {
+  1: string message
+}
+
+exception IndexAlreadyExistsException {
+  1: string message
+}
+
+exception InvalidOperationException {
+  1: string message
+}
+
+exception ConfigValSecurityException {
+  1: string message
+}
+
+exception InvalidInputException {
+  1: string message
+}
+
+// Transaction and lock exceptions
+exception NoSuchTxnException {
+    1: string message
+}
+
+exception TxnAbortedException {
+    1: string message
+}
+
+exception TxnOpenException {
+    1: string message
+}
+
+exception NoSuchLockException {
+    1: string message
+}
+
+/**
+* This interface is live.
+*/
+service ThriftHiveMetastore extends fb303.FacebookService
+{
+  string getMetaConf(1:string key) throws(1:MetaException o1)
+  void setMetaConf(1:string key, 2:string value) throws(1:MetaException o1)
+
+  void create_database(1:Database database) throws(1:AlreadyExistsException o1, 2:InvalidObjectException o2, 3:MetaException o3)
+  Database get_database(1:string name) throws(1:NoSuchObjectException o1, 2:MetaException o2)
+  void drop_database(1:string name, 2:bool deleteData, 3:bool cascade) throws(1:NoSuchObjectException o1, 2:InvalidOperationException o2, 3:MetaException o3)
+  list<string> get_databases(1:string pattern) throws(1:MetaException o1)
+  list<string> get_all_databases() throws(1:MetaException o1)
+  void alter_database(1:string dbname, 2:Database db) throws(1:MetaException o1, 2:NoSuchObjectException o2)
+
+  // returns the type with given name (make seperate calls for the dependent types if needed)
+  Type get_type(1:string name)  throws(1:MetaException o1, 2:NoSuchObjectException o2)
+  bool create_type(1:Type type) throws(1:AlreadyExistsException o1, 2:InvalidObjectException o2, 3:MetaException o3)
+  bool drop_type(1:string type) throws(1:MetaException o1, 2:NoSuchObjectException o2)
+  map<string, Type> get_type_all(1:string name)
+                                throws(1:MetaException o2)
+
+  // Gets a list of FieldSchemas describing the columns of a particular table
+  list<FieldSchema> get_fields(1: string db_name, 2: string table_name) throws (1: MetaException o1, 2: UnknownTableException o2, 3: UnknownDBException o3),
+  list<FieldSchema> get_fields_with_environment_context(1: string db_name, 2: string table_name, 3:EnvironmentContext environment_context) throws (1: MetaException o1, 2: UnknownTableException o2, 3: UnknownDBException o3)
+
+  // Gets a list of FieldSchemas describing both the columns and the partition keys of a particular table
+  list<FieldSchema> get_schema(1: string db_name, 2: string table_name) throws (1: MetaException o1, 2: UnknownTableException o2, 3: UnknownDBException o3)
+  list<FieldSchema> get_schema_with_environment_context(1: string db_name, 2: string table_name, 3:EnvironmentContext environment_context) throws (1: MetaException o1, 2: UnknownTableException o2, 3: UnknownDBException o3)
+
+  // create a Hive table. Following fields must be set
+  // tableName
+  // database        (only 'default' for now until Hive QL supports databases)
+  // owner           (not needed, but good to have for tracking purposes)
+  // sd.cols         (list of field schemas)
+  // sd.inputFormat  (SequenceFileInputFormat (binary like falcon tables or u_full) or TextInputFormat)
+  // sd.outputFormat (SequenceFileInputFormat (binary) or TextInputFormat)
+  // sd.serdeInfo.serializationLib (SerDe class name eg org.apache.hadoop.hive.serde.simple_meta.MetadataTypedColumnsetSerDe
+  // * See notes on DDL_TIME
+  void create_table(1:Table tbl) throws(1:AlreadyExistsException o1, 2:InvalidObjectException o2, 3:MetaException o3, 4:NoSuchObjectException o4)
+  void create_table_with_environment_context(1:Table tbl,
+      2:EnvironmentContext environment_context)
+      throws (1:AlreadyExistsException o1,
+              2:InvalidObjectException o2, 3:MetaException o3,
+              4:NoSuchObjectException o4)
+  void create_table_with_constraints(1:Table tbl, 2: list<SQLPrimaryKey> primaryKeys, 3: list<SQLForeignKey> foreignKeys)
+      throws (1:AlreadyExistsException o1,
+              2:InvalidObjectException o2, 3:MetaException o3,
+              4:NoSuchObjectException o4)
+  void drop_constraint(1:DropConstraintRequest req)
+      throws(1:NoSuchObjectException o1, 2:MetaException o3)
+  void add_primary_key(1:AddPrimaryKeyRequest req)
+      throws(1:NoSuchObjectException o1, 2:MetaException o2)
+  void add_foreign_key(1:AddForeignKeyRequest req)
+      throws(1:NoSuchObjectException o1, 2:MetaException o2)  
+
+  // drops the table and all the partitions associated with it if the table has partitions
+  // delete data (including partitions) if deleteData is set to true
+  void drop_table(1:string dbname, 2:string name, 3:bool deleteData)
+                       throws(1:NoSuchObjectException o1, 2:MetaException o3)
+  void drop_table_with_environment_context(1:string dbname, 2:string name, 3:bool deleteData,
+      4:EnvironmentContext environment_context)
+                       throws(1:NoSuchObjectException o1, 2:MetaException o3)
+  list<string> get_tables(1: string db_name, 2: string pattern) throws (1: MetaException o1)
+  list<string> get_tables_by_type(1: string db_name, 2: string pattern, 3: string tableType) throws (1: MetaException o1)
+  list<TableMeta> get_table_meta(1: string db_patterns, 2: string tbl_patterns, 3: list<string> tbl_types)
+                       throws (1: MetaException o1)
+  list<string> get_all_tables(1: string db_name) throws (1: MetaException o1)
+
+  Table get_table(1:string dbname, 2:string tbl_name)
+                       throws (1:MetaException o1, 2:NoSuchObjectException o2)
+  list<Table> get_table_objects_by_name(1:string dbname, 2:list<string> tbl_names)
+  GetTableResult get_table_req(1:GetTableRequest req)
+                       throws (1:MetaException o1, 2:NoSuchObjectException o2)
+  GetTablesResult get_table_objects_by_name_req(1:GetTablesRequest req)
+
+
+				   throws (1:MetaException o1, 2:InvalidOperationException o2, 3:UnknownDBException o3)
+
+  // Get a list of table names that match a filter.
+  // The filter operators are LIKE, <, <=, >, >=, =, <>
+  //
+  // In the filter statement, values interpreted as strings must be enclosed in quotes,
+  // while values interpreted as integers should not be.  Strings and integers are the only
+  // supported value types.
+  //
+  // The currently supported key names in the filter are:
+  // Constants.HIVE_FILTER_FIELD_OWNER, which filters on the tables' owner's name
+  //   and supports all filter operators
+  // Constants.HIVE_FILTER_FIELD_LAST_ACCESS, which filters on the last access times
+  //   and supports all filter operators except LIKE
+  // Constants.HIVE_FILTER_FIELD_PARAMS, which filters on the tables' parameter keys and values
+  //   and only supports the filter operators = and <>.
+  //   Append the parameter key name to HIVE_FILTER_FIELD_PARAMS in the filter statement.
+  //   For example, to filter on parameter keys called "retention", the key name in the filter
+  //   statement should be Constants.HIVE_FILTER_FIELD_PARAMS + "retention"
+  //   Also, = and <> only work for keys that exist
+  //   in the tables. E.g., if you are looking for tables where key1 <> value, it will only
+  //   look at tables that have a value for the parameter key1.
+  // Some example filter statements include:
+  // filter = Constants.HIVE_FILTER_FIELD_OWNER + " like \".*test.*\" and " +
+  //   Constants.HIVE_FILTER_FIELD_LAST_ACCESS + " = 0";
+  // filter = Constants.HIVE_FILTER_FIELD_PARAMS + "retention = \"30\" or " +
+  //   Constants.HIVE_FILTER_FIELD_PARAMS + "retention = \"90\""
+  // @param dbName
+  //          The name of the database from which you will retrieve the table names
+  // @param filterType
+  //          The type of filter
+  // @param filter
+  //          The filter string
+  // @param max_tables
+  //          The maximum number of tables returned
+  // @return  A list of table names that match the desired filter
+  list<string> get_table_names_by_filter(1:string dbname, 2:string filter, 3:i16 max_tables=-1)
+                       throws (1:MetaException o1, 2:InvalidOperationException o2, 3:UnknownDBException o3)
+
+  // alter table applies to only future partitions not for existing partitions
+  // * See notes on DDL_TIME
+  void alter_table(1:string dbname, 2:string tbl_name, 3:Table new_tbl)
+                       throws (1:InvalidOperationException o1, 2:MetaException o2)
+  void alter_table_with_environment_context(1:string dbname, 2:string tbl_name,
+      3:Table new_tbl, 4:EnvironmentContext environment_context)
+      throws (1:InvalidOperationException o1, 2:MetaException o2)
+  // alter table not only applies to future partitions but also cascade to existing partitions
+  void alter_table_with_cascade(1:string dbname, 2:string tbl_name, 3:Table new_tbl, 4:bool cascade)
+                       throws (1:InvalidOperationException o1, 2:MetaException o2)
+  // the following applies to only tables that have partitions
+  // * See notes on DDL_TIME
+  Partition add_partition(1:Partition new_part)
+                       throws(1:InvalidObjectException o1, 2:AlreadyExistsException o2, 3:MetaException o3)
+  Partition add_partition_with_environment_context(1:Partition new_part,
+      2:EnvironmentContext environment_context)
+      throws (1:InvalidObjectException o1, 2:AlreadyExistsException o2,
+      3:MetaException o3)
+  i32 add_partitions(1:list<Partition> new_parts)
+                       throws(1:InvalidObjectException o1, 2:AlreadyExistsException o2, 3:MetaException o3)
+  i32 add_partitions_pspec(1:list<PartitionSpec> new_parts)
+                       throws(1:InvalidObjectException o1, 2:AlreadyExistsException o2, 3:MetaException o3)
+  Partition append_partition(1:string db_name, 2:string tbl_name, 3:list<string> part_vals)
+                       throws (1:InvalidObjectException o1, 2:AlreadyExistsException o2, 3:MetaException o3)
+  AddPartitionsResult add_partitions_req(1:AddPartitionsRequest request)
+                       throws(1:InvalidObjectException o1, 2:AlreadyExistsException o2, 3:MetaException o3)
+  Partition append_partition_with_environment_context(1:string db_name, 2:string tbl_name,
+      3:list<string> part_vals, 4:EnvironmentContext environment_context)
+                       throws (1:InvalidObjectException o1, 2:AlreadyExistsException o2, 3:MetaException o3)
+  Partition append_partition_by_name(1:string db_name, 2:string tbl_name, 3:string part_name)
+                       throws (1:InvalidObjectException o1, 2:AlreadyExistsException o2, 3:MetaException o3)
+  Partition append_partition_by_name_with_environment_context(1:string db_name, 2:string tbl_name,
+      3:string part_name, 4:EnvironmentContext environment_context)
+                       throws (1:InvalidObjectException o1, 2:AlreadyExistsException o2, 3:MetaException o3)
+  bool drop_partition(1:string db_name, 2:string tbl_name, 3:list<string> part_vals, 4:bool deleteData)
+                       throws(1:NoSuchObjectException o1, 2:MetaException o2)
+  bool drop_partition_with_environment_context(1:string db_name, 2:string tbl_name,
+      3:list<string> part_vals, 4:bool deleteData, 5:EnvironmentContext environment_context)
+                       throws(1:NoSuchObjectException o1, 2:MetaException o2)
+  bool drop_partition_by_name(1:string db_name, 2:string tbl_name, 3:string part_name, 4:bool deleteData)
+                       throws(1:NoSuchObjectException o1, 2:MetaException o2)
+  bool drop_partition_by_name_with_environment_context(1:string db_name, 2:string tbl_name,
+      3:string part_name, 4:bool deleteData, 5:EnvironmentContext environment_context)
+                       throws(1:NoSuchObjectException o1, 2:MetaException o2)
+  DropPartitionsResult drop_partitions_req(1: DropPartitionsRequest req)
+                       throws(1:NoSuchObjectException o1, 2:MetaException o2)
+
+  Partition get_partition(1:string db_name, 2:string tbl_name, 3:list<string> part_vals)
+                       throws(1:MetaException o1, 2:NoSuchObjectException o2)
+  Partition exchange_partition(1:map<string, string> partitionSpecs, 2:string source_db,
+      3:string source_table_name, 4:string dest_db, 5:string dest_table_name)
+      throws(1:MetaException o1, 2:NoSuchObjectException o2, 3:InvalidObjectException o3,
+      4:InvalidInputException o4)
+
+  list<Partition> exchange_partitions(1:map<string, string> partitionSpecs, 2:string source_db,
+      3:string source_table_name, 4:string dest_db, 5:string dest_table_name)
+      throws(1:MetaException o1, 2:NoSuchObjectException o2, 3:InvalidObjectException o3,
+      4:InvalidInputException o4)
+
+  Partition get_partition_with_auth(1:string db_name, 2:string tbl_name, 3:list<string> part_vals,
+      4: string user_name, 5: list<string> group_names) throws(1:MetaException o1, 2:NoSuchObjectException o2)
+
+  Partition get_partition_by_name(1:string db_name 2:string tbl_name, 3:string part_name)
+                       throws(1:MetaException o1, 2:NoSuchObjectException o2)
+
+  // returns all the partitions for this table in reverse chronological order.
+  // If max parts is given then it will return only that many.
+  list<Partition> get_partitions(1:string db_name, 2:string tbl_name, 3:i16 max_parts=-1)
+                       throws(1:NoSuchObjectException o1, 2:MetaException o2)
+  list<Partition> get_partitions_with_auth(1:string db_name, 2:string tbl_name, 3:i16 max_parts=-1,
+     4: string user_name, 5: list<string> group_names) throws(1:NoSuchObjectException o1, 2:MetaException o2)
+
+  list<PartitionSpec> get_partitions_pspec(1:string db_name, 2:string tbl_name, 3:i32 max_parts=-1)
+                       throws(1:NoSuchObjectException o1, 2:MetaException o2)
+
+  list<string> get_partition_names(1:string db_name, 2:string tbl_name, 3:i16 max_parts=-1)
+                       throws(1:MetaException o2)
+
+  // get_partition*_ps methods allow filtering by a partial partition specification,
+  // as needed for dynamic partitions. The values that are not restricted should
+  // be empty strings. Nulls were considered (instead of "") but caused errors in
+  // generated Python code. The size of part_vals may be smaller than the
+  // number of partition columns - the unspecified values are considered the same
+  // as "".
+  list<Partition> get_partitions_ps(1:string db_name 2:string tbl_name
+  	3:list<string> part_vals, 4:i16 max_parts=-1)
+                       throws(1:MetaException o1, 2:NoSuchObjectException o2)
+  list<Partition> get_partitions_ps_with_auth(1:string db_name, 2:string tbl_name, 3:list<string> part_vals, 4:i16 max_parts=-1,
+     5: string user_name, 6: list<string> group_names) throws(1:NoSuchObjectException o1, 2:MetaException o2)
+
+  list<string> get_partition_names_ps(1:string db_name,
+  	2:string tbl_name, 3:list<string> part_vals, 4:i16 max_parts=-1)
+  	                   throws(1:MetaException o1, 2:NoSuchObjectException o2)
+
+  // get the partitions matching the given partition filter
+  list<Partition> get_partitions_by_filter(1:string db_name 2:string tbl_name
+    3:string filter, 4:i16 max_parts=-1)
+                       throws(1:MetaException o1, 2:NoSuchObjectException o2)
+
+  // List partitions as PartitionSpec instances.
+  list<PartitionSpec> get_part_specs_by_filter(1:string db_name 2:string tbl_name
+    3:string filter, 4:i32 max_parts=-1)
+                       throws(1:MetaException o1, 2:NoSuchObjectException o2)
+
+  // get the partitions matching the given partition filter
+  // unlike get_partitions_by_filter, takes serialized hive expression, and with that can work
+  // with any filter (get_partitions_by_filter only works if the filter can be pushed down to JDOQL.
+  PartitionsByExprResult get_partitions_by_expr(1:PartitionsByExprRequest req)
+                       throws(1:MetaException o1, 2:NoSuchObjectException o2)
+
+  // get the partitions matching the given partition filter
+  i32 get_num_partitions_by_filter(1:string db_name 2:string tbl_name 3:string filter)
+                       throws(1:MetaException o1, 2:NoSuchObjectException o2)
+
+  // get partitions give a list of partition names
+  list<Partition> get_partitions_by_names(1:string db_name 2:string tbl_name 3:list<string> names)
+                       throws(1:MetaException o1, 2:NoSuchObjectException o2)
+
+  // changes the partition to the new partition object. partition is identified from the part values
+  // in the new_part
+  // * See notes on DDL_TIME
+  void alter_partition(1:string db_name, 2:string tbl_name, 3:Partition new_part)
+                       throws (1:InvalidOperationException o1, 2:MetaException o2)
+
+  // change a list of partitions. All partitions are altered atomically and all
+  // prehooks are fired together followed by all post hooks
+  void alter_partitions(1:string db_name, 2:string tbl_name, 3:list<Partition> new_parts)
+                       throws (1:InvalidOperationException o1, 2:MetaException o2)
+  void alter_partitions_with_environment_context(1:string db_name, 2:string tbl_name, 3:list<Partition> new_parts, 4:EnvironmentContext environment_context) throws (1:InvalidOperationException o1, 2:MetaException o2)
+
+  void alter_partition_with_environment_context(1:string db_name,
+      2:string tbl_name, 3:Partition new_part,
+      4:EnvironmentContext environment_context)
+      throws (1:InvalidOperationException o1, 2:MetaException o2)
+
+  // rename the old partition to the new partition object by changing old part values to the part values
+  // in the new_part. old partition is identified from part_vals.
+  // partition keys in new_part should be the same as those in old partition.
+  void rename_partition(1:string db_name, 2:string tbl_name, 3:list<string> part_vals, 4:Partition new_part)
+                       throws (1:InvalidOperationException o1, 2:MetaException o2)
+
+  // returns whether or not the partition name is valid based on the value of the config
+  // hive.metastore.partition.name.whitelist.pattern
+  bool partition_name_has_valid_characters(1:list<string> part_vals, 2:bool throw_exception)
+ 	throws(1: MetaException o1)
+
+  // gets the value of the configuration key in the metastore server. returns
+  // defaultValue if the key does not exist. if the configuration key does not
+  // begin with "hive", "mapred", or "hdfs", a ConfigValSecurityException is
+  // thrown.
+  string get_config_value(1:string name, 2:string defaultValue)
+                          throws(1:ConfigValSecurityException o1)
+
+  // converts a partition name into a partition values array
+  list<string> partition_name_to_vals(1: string part_name)
+                          throws(1: MetaException o1)
+  // converts a partition name into a partition specification (a mapping from
+  // the partition cols to the values)
+  map<string, string> partition_name_to_spec(1: string part_name)
+                          throws(1: MetaException o1)
+
+  void markPartitionForEvent(1:string db_name, 2:string tbl_name, 3:map<string,string> part_vals,
+                  4:PartitionEventType eventType) throws (1: MetaException o1, 2: NoSuchObjectException o2,
+                  3: UnknownDBException o3, 4: UnknownTableException o4, 5: UnknownPartitionException o5,
+                  6: InvalidPartitionException o6)
+  bool isPartitionMarkedForEvent(1:string db_name, 2:string tbl_name, 3:map<string,string> part_vals,
+                  4: PartitionEventType eventType) throws (1: MetaException o1, 2:NoSuchObjectException o2,
+                  3: UnknownDBException o3, 4: UnknownTableException o4, 5: UnknownPartitionException o5,
+                  6: InvalidPartitionException o6)
+
+  //index
+  Index add_index(1:Index new_index, 2: Table index_table)
+                       throws(1:InvalidObjectException o1, 2:AlreadyExistsException o2, 3:MetaException o3)
+  void alter_index(1:string dbname, 2:string base_tbl_name, 3:string idx_name, 4:Index new_idx)
+                       throws (1:InvalidOperationException o1, 2:MetaException o2)
+  bool drop_index_by_name(1:string db_name, 2:string tbl_name, 3:string index_name, 4:bool deleteData)
+                       throws(1:NoSuchObjectException o1, 2:MetaException o2)
+  Index get_index_by_name(1:string db_name 2:string tbl_name, 3:string index_name)
+                       throws(1:MetaException o1, 2:NoSuchObjectException o2)
+
+  list<Index> get_indexes(1:string db_name, 2:string tbl_name, 3:i16 max_indexes=-1)
+                       throws(1:NoSuchObjectException o1, 2:MetaException o2)
+  list<string> get_index_names(1:string db_name, 2:string tbl_name, 3:i16 max_indexes=-1)
+                       throws(1:MetaException o2)
+
+ //primary keys and foreign keys
+  PrimaryKeysResponse get_primary_keys(1:PrimaryKeysRequest request)
+                       throws(1:MetaException o1, 2:NoSuchObjectException o2)
+  ForeignKeysResponse get_foreign_keys(1:ForeignKeysRequest request)
+                       throws(1:MetaException o1, 2:NoSuchObjectException o2)
+
+  // column statistics interfaces
+
+  // update APIs persist the column statistics object(s) that are passed in. If statistics already
+  // exists for one or more columns, the existing statistics will be overwritten. The update APIs
+  // validate that the dbName, tableName, partName, colName[] passed in as part of the ColumnStatistics
+  // struct are valid, throws InvalidInputException/NoSuchObjectException if found to be invalid
+  bool update_table_column_statistics(1:ColumnStatistics stats_obj) throws (1:NoSuchObjectException o1,
+              2:InvalidObjectException o2, 3:MetaException o3, 4:InvalidInputException o4)
+  bool update_partition_column_statistics(1:ColumnStatistics stats_obj) throws (1:NoSuchObjectException o1,
+              2:InvalidObjectException o2, 3:MetaException o3, 4:InvalidInputException o4)
+
+  // get APIs return the column statistics corresponding to db_name, tbl_name, [part_name], col_name if
+  // such statistics exists. If the required statistics doesn't exist, get APIs throw NoSuchObjectException
+  // For instance, if get_table_column_statistics is called on a partitioned table for which only
+  // partition level column stats exist, get_table_column_statistics will throw NoSuchObjectException
+  ColumnStatistics get_table_column_statistics(1:string db_name, 2:string tbl_name, 3:string col_name) throws
+              (1:NoSuchObjectException o1, 2:MetaException o2, 3:InvalidInputException o3, 4:InvalidObjectException o4)
+  ColumnStatistics get_partition_column_statistics(1:string db_name, 2:string tbl_name, 3:string part_name,
+               4:string col_name) throws (1:NoSuchObjectException o1, 2:MetaException o2,
+               3:InvalidInputException o3, 4:InvalidObjectException o4)
+  TableStatsResult get_table_statistics_req(1:TableStatsRequest request) throws
+              (1:NoSuchObjectException o1, 2:MetaException o2)
+  PartitionsStatsResult get_partitions_statistics_req(1:PartitionsStatsRequest request) throws
+              (1:NoSuchObjectException o1, 2:MetaException o2)
+  AggrStats get_aggr_stats_for(1:PartitionsStatsRequest request) throws
+              (1:NoSuchObjectException o1, 2:MetaException o2)
+  bool set_aggr_stats_for(1:SetPartitionsStatsRequest request) throws
+              (1:NoSuchObjectException o1, 2:InvalidObjectException o2, 3:MetaException o3, 4:InvalidInputException o4)
+
+
+  // delete APIs attempt to delete column statistics, if found, associated with a given db_name, tbl_name, [part_name]
+  // and col_name. If the delete API doesn't find the statistics record in the metastore, throws NoSuchObjectException
+  // Delete API validates the input and if the input is invalid throws InvalidInputException/InvalidObjectException.
+  bool delete_partition_column_statistics(1:string db_name, 2:string tbl_name, 3:string part_name, 4:string col_name) throws
+              (1:NoSuchObjectException o1, 2:MetaException o2, 3:InvalidObjectException o3,
+               4:InvalidInputException o4)
+  bool delete_table_column_statistics(1:string db_name, 2:string tbl_name, 3:string col_name) throws
+              (1:NoSuchObjectException o1, 2:MetaException o2, 3:InvalidObjectException o3,
+               4:InvalidInputException o4)
+
+  //
+  // user-defined functions
+  //
+
+  void create_function(1:Function func)
+      throws (1:AlreadyExistsException o1,
+              2:InvalidObjectException o2,
+              3:MetaException o3,
+              4:NoSuchObjectException o4)
+
+  void drop_function(1:string dbName, 2:string funcName)
+      throws (1:NoSuchObjectException o1, 2:MetaException o3)
+
+  void alter_function(1:string dbName, 2:string funcName, 3:Function newFunc)
+      throws (1:InvalidOperationException o1, 2:MetaException o2)
+
+  list<string> get_functions(1:string dbName, 2:string pattern)
+      throws (1:MetaException o1)
+  Function get_function(1:string dbName, 2:string funcName)
+      throws (1:MetaException o1, 2:NoSuchObjectException o2)
+
+  GetAllFunctionsResponse get_all_functions() throws (1:MetaException o1)
+
+  //authorization privileges
+
+  bool create_role(1:Role role) throws(1:MetaException o1)
+  bool drop_role(1:string role_name) throws(1:MetaException o1)
+  list<string> get_role_names() throws(1:MetaException o1)
+  // Deprecated, use grant_revoke_role()
+  bool grant_role(1:string role_name, 2:string principal_name, 3:PrincipalType principal_type,
+    4:string grantor, 5:PrincipalType grantorType, 6:bool grant_option) throws(1:MetaException o1)
+  // Deprecated, use grant_revoke_role()
+  bool revoke_role(1:string role_name, 2:string principal_name, 3:PrincipalType principal_type)
+                        throws(1:MetaException o1)
+  list<Role> list_roles(1:string principal_name, 2:PrincipalType principal_type) throws(1:MetaException o1)
+  GrantRevokeRoleResponse grant_revoke_role(1:GrantRevokeRoleRequest request) throws(1:MetaException o1)
+
+  // get all role-grants for users/roles that have been granted the given role
+  // Note that in the returned list of RolePrincipalGrants, the roleName is
+  // redundant as it would match the role_name argument of this function
+  GetPrincipalsInRoleResponse get_principals_in_role(1: GetPrincipalsInRoleRequest request) throws(1:MetaException o1)
+
+  // get grant information of all roles granted to the given principal
+  // Note that in the returned list of RolePrincipalGrants, the principal name,type is
+  // redundant as it would match the principal name,type arguments of this function
+  GetRoleGrantsForPrincipalResponse get_role_grants_for_principal(1: GetRoleGrantsForPrincipalRequest request) throws(1:MetaException o1)
+
+  PrincipalPrivilegeSet get_privilege_set(1:HiveObjectRef hiveObject, 2:string user_name,
+    3: list<string> group_names) throws(1:MetaException o1)
+  list<HiveObjectPrivilege> list_privileges(1:string principal_name, 2:PrincipalType principal_type,
+    3: HiveObjectRef hiveObject) throws(1:MetaException o1)
+
+  // Deprecated, use grant_revoke_privileges()
+  bool grant_privileges(1:PrivilegeBag privileges) throws(1:MetaException o1)
+  // Deprecated, use grant_revoke_privileges()
+  bool revoke_privileges(1:PrivilegeBag privileges) throws(1:MetaException o1)
+  GrantRevokePrivilegeResponse grant_revoke_privileges(1:GrantRevokePrivilegeRequest request) throws(1:MetaException o1);
+
+  // this is used by metastore client to send UGI information to metastore server immediately
+  // after setting up a connection.
+  list<string> set_ugi(1:string user_name, 2:list<string> group_names) throws (1:MetaException o1)
+
+  //Authentication (delegation token) interfaces
+
+  // get metastore server delegation token for use from the map/reduce tasks to authenticate
+  // to metastore server
+  string get_delegation_token(1:string token_owner, 2:string renewer_kerberos_principal_name)
+    throws (1:MetaException o1)
+
+  // method to renew delegation token obtained from metastore server
+  i64 renew_delegation_token(1:string token_str_form) throws (1:MetaException o1)
+
+  // method to cancel delegation token obtained from metastore server
+  void cancel_delegation_token(1:string token_str_form) throws (1:MetaException o1)
+
+  // add a delegation token
+  bool add_token(1:string token_identifier, 2:string delegation_token)
+
+  // remove a delegation token
+  bool remove_token(1:string token_identifier)
+
+  // get a delegation token by identifier
+  string get_token(1:string token_identifier)
+
+  // get all delegation token identifiers
+  list<string> get_all_token_identifiers()
+
+  // add master key
+  i32 add_master_key(1:string key) throws (1:MetaException o1)
+
+  // update master key
+  void update_master_key(1:i32 seq_number, 2:string key) throws (1:NoSuchObjectException o1, 2:MetaException o2)
+
+  // remove master key
+  bool remove_master_key(1:i32 key_seq)
+
+  // get master keys
+  list<string> get_master_keys()
+
+  // Transaction and lock management calls
+  // Get just list of open transactions
+  GetOpenTxnsResponse get_open_txns()
+  // Get list of open transactions with state (open, aborted)
+  GetOpenTxnsInfoResponse get_open_txns_info()
+  OpenTxnsResponse open_txns(1:OpenTxnRequest rqst)
+  void abort_txn(1:AbortTxnRequest rqst) throws (1:NoSuchTxnException o1)
+  void abort_txns(1:AbortTxnsRequest rqst) throws (1:NoSuchTxnException o1)
+  void commit_txn(1:CommitTxnRequest rqst) throws (1:NoSuchTxnException o1, 2:TxnAbortedException o2)
+  LockResponse lock(1:LockRequest rqst) throws (1:NoSuchTxnException o1, 2:TxnAbortedException o2)
+  LockResponse check_lock(1:CheckLockRequest rqst)
+    throws (1:NoSuchTxnException o1, 2:TxnAbortedException o2, 3:NoSuchLockException o3)
+  void unlock(1:UnlockRequest rqst) throws (1:NoSuchLockException o1, 2:TxnOpenException o2)
+  ShowLocksResponse show_locks(1:ShowLocksRequest rqst)
+  void heartbeat(1:HeartbeatRequest ids) throws (1:NoSuchLockException o1, 2:NoSuchTxnException o2, 3:TxnAbortedException o3)
+  HeartbeatTxnRangeResponse heartbeat_txn_range(1:HeartbeatTxnRangeRequest txns)
+  void compact(1:CompactionRequest rqst) 
+  CompactionResponse compact2(1:CompactionRequest rqst) 
+  ShowCompactResponse show_compact(1:ShowCompactRequest rqst)
+  void add_dynamic_partitions(1:AddDynamicPartitions rqst) throws (1:NoSuchTxnException o1, 2:TxnAbortedException o2)
+
+  // Notification logging calls
+  NotificationEventResponse get_next_notification(1:NotificationEventRequest rqst) 
+  CurrentNotificationEventId get_current_notificationEventId()
+  FireEventResponse fire_listener_event(1:FireEventRequest rqst)
+  void flushCache()
+
+  GetFileMetadataByExprResult get_file_metadata_by_expr(1:GetFileMetadataByExprRequest req)
+  GetFileMetadataResult get_file_metadata(1:GetFileMetadataRequest req)
+  PutFileMetadataResult put_file_metadata(1:PutFileMetadataRequest req)
+  ClearFileMetadataResult clear_file_metadata(1:ClearFileMetadataRequest req)
+  CacheFileMetadataResult cache_file_metadata(1:CacheFileMetadataRequest req)
+
+}
+
+// * Note about the DDL_TIME: When creating or altering a table or a partition,
+// if the DDL_TIME is not set, the current time will be used.
+
+// For storing info about archived partitions in parameters
+
+// Whether the partition is archived
+const string IS_ARCHIVED = "is_archived",
+// The original location of the partition, before archiving. After archiving,
+// this directory will contain the archive. When the partition
+// is dropped, this directory will be deleted
+const string ORIGINAL_LOCATION = "original_location",
+
+// Whether or not the table is considered immutable - immutable tables can only be
+// overwritten or created if unpartitioned, or if partitioned, partitions inside them
+// can only be overwritten or created. Immutability supports write-once and replace
+// semantics, but not append.
+const string IS_IMMUTABLE = "immutable",
+
+// these should be needed only for backward compatibility with filestore
+const string META_TABLE_COLUMNS   = "columns",
+const string META_TABLE_COLUMN_TYPES   = "columns.types",
+const string BUCKET_FIELD_NAME    = "bucket_field_name",
+const string BUCKET_COUNT         = "bucket_count",
+const string FIELD_TO_DIMENSION   = "field_to_dimension",
+const string META_TABLE_NAME      = "name",
+const string META_TABLE_DB        = "db",
+const string META_TABLE_LOCATION  = "location",
+const string META_TABLE_SERDE     = "serde",
+const string META_TABLE_PARTITION_COLUMNS = "partition_columns",
+const string META_TABLE_PARTITION_COLUMN_TYPES = "partition_columns.types",
+const string FILE_INPUT_FORMAT    = "file.inputformat",
+const string FILE_OUTPUT_FORMAT   = "file.outputformat",
+const string META_TABLE_STORAGE   = "storage_handler",
+const string TABLE_IS_TRANSACTIONAL = "transactional",
+const string TABLE_NO_AUTO_COMPACT = "no_auto_compaction",
+const string TABLE_TRANSACTIONAL_PROPERTIES = "transactional_properties",
+


[5/5] kudu git commit: KUDU-2191 (3/n): Add mini HMS option to external mini cluster

Posted by to...@apache.org.
KUDU-2191 (3/n): Add mini HMS option to external mini cluster

A couple of features are explicitly being punted on for now:

- If kerberos and the HMS are both configured, the HMS should be
  kerberized as well. This will come in a follow-up commit (the
  current HMS C++ client can't be used against a kerberized HMS).
- No API is provided in the mini-cluster tool to retrieve the HMS
  address. This will be added if/when it becomes necessary.

Change-Id: Iaa1da05b13a120f2d4b739b6ab709e6315c955c0
Reviewed-on: http://gerrit.cloudera.org:8080/8304
Tested-by: Kudu Jenkins
Reviewed-by: Adar Dembo <ad...@cloudera.com>
Reviewed-by: Todd Lipcon <to...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/b418e88b
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/b418e88b
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/b418e88b

Branch: refs/heads/master
Commit: b418e88b657c91fd53c78cb5567e93a7348042f6
Parents: 31d16f7
Author: Dan Burkert <da...@apache.org>
Authored: Tue Oct 17 13:02:30 2017 -0700
Committer: Todd Lipcon <to...@apache.org>
Committed: Fri Nov 3 00:09:20 2017 +0000

----------------------------------------------------------------------
 src/kudu/mini-cluster/CMakeLists.txt            |  5 +-
 .../mini-cluster/external_mini_cluster-test.cc  | 56 +++++++++++++++++---
 src/kudu/mini-cluster/external_mini_cluster.cc  |  8 +++
 src/kudu/mini-cluster/external_mini_cluster.h   | 14 +++++
 src/kudu/tools/tool.proto                       |  3 ++
 src/kudu/tools/tool_action_test.cc              |  1 +
 6 files changed, 78 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/b418e88b/src/kudu/mini-cluster/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/src/kudu/mini-cluster/CMakeLists.txt b/src/kudu/mini-cluster/CMakeLists.txt
index 57f945d..c3be379 100644
--- a/src/kudu/mini-cluster/CMakeLists.txt
+++ b/src/kudu/mini-cluster/CMakeLists.txt
@@ -27,13 +27,14 @@ target_link_libraries(mini_cluster
   glog
   gmock
   gutil
+  krpc
   kudu_client
   kudu_common
   kudu_test_util
   kudu_util
-  krpc
   master
   master_proto
+  mini_hms
   mini_kdc
   server_base_proto
   tablet_proto
@@ -47,5 +48,5 @@ add_dependencies(mini_cluster
   kudu-master)
 
 # Tests
-set(KUDU_TEST_LINK_LIBS mini_cluster ${KUDU_MIN_TEST_LIBS})
+set(KUDU_TEST_LINK_LIBS mini_cluster kudu_hms ${KUDU_MIN_TEST_LIBS})
 ADD_KUDU_TEST(external_mini_cluster-test RESOURCE_LOCK "master-rpc-ports")

http://git-wip-us.apache.org/repos/asf/kudu/blob/b418e88b/src/kudu/mini-cluster/external_mini_cluster-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/mini-cluster/external_mini_cluster-test.cc b/src/kudu/mini-cluster/external_mini_cluster-test.cc
index d257ba0..cb1bef3 100644
--- a/src/kudu/mini-cluster/external_mini_cluster-test.cc
+++ b/src/kudu/mini-cluster/external_mini_cluster-test.cc
@@ -15,15 +15,19 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#include <iosfwd>
 #include <string>
 #include <utility>
 #include <vector>
 
 #include <glog/logging.h>
+#include <glog/stl_logging.h> // IWYU pragma: keep
 #include <gtest/gtest.h>
 
 #include "kudu/gutil/strings/substitute.h"
-#include "kudu/gutil/strings/util.h"
+#include "kudu/gutil/strings/util.h" // IWYU pragma: keep
+#include "kudu/hms/hms_client.h"
+#include "kudu/hms/mini_hms.h"
 #include "kudu/mini-cluster/external_mini_cluster.h"
 #include "kudu/mini-cluster/mini_cluster.h"
 #include "kudu/security/test/mini_kdc.h"
@@ -34,22 +38,50 @@
 #include "kudu/util/test_util.h"
 
 namespace kudu {
-
 namespace cluster {
 
+using std::make_pair;
+using std::pair;
 using std::string;
+using std::vector;
 using strings::Substitute;
 
-enum KerberosMode {
-  WITHOUT_KERBEROS, WITH_KERBEROS
+enum class Kerberos {
+  ENABLED,
+  DISABLED,
+};
+
+enum HiveMetastore {
+  ENABLED,
+  DISABLED,
 };
 
+// Beautifies CLI test output.
+std::ostream& operator<<(std::ostream& o, Kerberos k) {
+  switch (k) {
+    case Kerberos::ENABLED: return o << "Kerberos::ENABLED";
+    case Kerberos::DISABLED: return o << "Kerberos::DISABLED";
+  }
+  return o;
+}
+std::ostream& operator<<(std::ostream& o, HiveMetastore k) {
+  switch (k) {
+    case HiveMetastore::ENABLED: return o << "HiveMetastore::ENABLED";
+    case HiveMetastore::DISABLED: return o << "HiveMetastore::DISABLED";
+  }
+  return o;
+}
+
 class ExternalMiniClusterTest : public KuduTest,
-                                public testing::WithParamInterface<KerberosMode> {};
+                                public testing::WithParamInterface<pair<Kerberos, HiveMetastore>> {
+};
 
+// TODO(dan): Add ENABLED/ENABLED when the mini HMS supports Kerberos.
 INSTANTIATE_TEST_CASE_P(KerberosOnAndOff,
                         ExternalMiniClusterTest,
-                        ::testing::Values(WITHOUT_KERBEROS, WITH_KERBEROS));
+                        testing::Values(make_pair(Kerberos::DISABLED, HiveMetastore::DISABLED),
+                                        make_pair(Kerberos::ENABLED, HiveMetastore::DISABLED),
+                                        make_pair(Kerberos::DISABLED, HiveMetastore::ENABLED)));
 
 void SmokeTestKerberizedCluster(ExternalMiniClusterOptions opts) {
   ASSERT_TRUE(opts.enable_kerberos);
@@ -90,7 +122,8 @@ TEST_F(ExternalMiniClusterTest, TestKerberosReacquire) {
 
 TEST_P(ExternalMiniClusterTest, TestBasicOperation) {
   ExternalMiniClusterOptions opts;
-  opts.enable_kerberos = GetParam() == WITH_KERBEROS;
+  opts.enable_kerberos = GetParam().first == Kerberos::ENABLED;
+  opts.enable_hive_metastore = GetParam().second == HiveMetastore::ENABLED;
 
   // Hard-coded RPC ports for the masters. This is safe, as this unit test
   // runs under a resource lock (see CMakeLists.txt in this directory).
@@ -170,6 +203,15 @@ TEST_P(ExternalMiniClusterTest, TestBasicOperation) {
                         ".*No such file or directory)");
   }
 
+  // Verify that the HMS is reachable.
+  if (opts.enable_hive_metastore) {
+    hms::HmsClient hms_client(cluster.hms()->address());
+    ASSERT_OK(hms_client.Start());
+    vector<string> tables;
+    ASSERT_OK(hms_client.GetAllTables("default", &tables));
+    ASSERT_TRUE(tables.empty()) << "tables: " << tables;
+  }
+
   // Test that if we inject a fault into a tablet server's boot process
   // ExternalTabletServer::Restart() still returns OK, even if the tablet server crashed.
   ts->Shutdown();

http://git-wip-us.apache.org/repos/asf/kudu/blob/b418e88b/src/kudu/mini-cluster/external_mini_cluster.cc
----------------------------------------------------------------------
diff --git a/src/kudu/mini-cluster/external_mini_cluster.cc b/src/kudu/mini-cluster/external_mini_cluster.cc
index dae51a3..a9bd52c 100644
--- a/src/kudu/mini-cluster/external_mini_cluster.cc
+++ b/src/kudu/mini-cluster/external_mini_cluster.cc
@@ -39,6 +39,7 @@
 #include "kudu/gutil/strings/join.h"
 #include "kudu/gutil/strings/substitute.h"
 #include "kudu/gutil/strings/util.h"
+#include "kudu/hms/mini_hms.h"
 #include "kudu/master/master.pb.h"
 #include "kudu/master/master.proxy.h"
 #include "kudu/rpc/messenger.h"
@@ -102,6 +103,7 @@ ExternalMiniClusterOptions::ExternalMiniClusterOptions()
       bind_mode(MiniCluster::kDefaultBindMode),
       num_data_dirs(1),
       enable_kerberos(false),
+      enable_hive_metastore(false),
       logtostderr(true),
       start_process_timeout(MonoDelta::FromSeconds(30)) {
 }
@@ -176,6 +178,12 @@ Status ExternalMiniCluster::Start() {
                           "could not set krb5 client env");
   }
 
+  if (opts_.enable_hive_metastore) {
+    hms_.reset(new hms::MiniHms());
+    RETURN_NOT_OK_PREPEND(hms_->Start(),
+                          "Failed to start the Hive Metastore");
+  }
+
   if (opts_.num_masters != 1) {
     RETURN_NOT_OK_PREPEND(StartDistributedMasters(),
                           "Failed to add distributed masters");

http://git-wip-us.apache.org/repos/asf/kudu/blob/b418e88b/src/kudu/mini-cluster/external_mini_cluster.h
----------------------------------------------------------------------
diff --git a/src/kudu/mini-cluster/external_mini_cluster.h b/src/kudu/mini-cluster/external_mini_cluster.h
index 789467f..f879949 100644
--- a/src/kudu/mini-cluster/external_mini_cluster.h
+++ b/src/kudu/mini-cluster/external_mini_cluster.h
@@ -52,6 +52,10 @@ class KuduClient;
 class KuduClientBuilder;
 } // namespace client
 
+namespace hms {
+class MiniHms;
+} // namespace hms
+
 namespace master {
 class MasterServiceProxy;
 } // namespace master
@@ -132,6 +136,11 @@ struct ExternalMiniClusterOptions {
   // Default: false.
   bool enable_kerberos;
 
+  // If true, set up a Hive Metastore as part of this ExternalMiniCluster.
+  //
+  // Default: false.
+  bool enable_hive_metastore;
+
   // If true, sends logging output to stderr instead of a log file.
   //
   // Default: true.
@@ -234,6 +243,10 @@ class ExternalMiniCluster : public MiniCluster {
     return kdc_.get();
   }
 
+  hms::MiniHms* hms() const {
+    return hms_.get();
+  }
+
   const std::string& data_root() const {
     return opts_.data_root;
   }
@@ -343,6 +356,7 @@ class ExternalMiniCluster : public MiniCluster {
   std::vector<scoped_refptr<ExternalMaster> > masters_;
   std::vector<scoped_refptr<ExternalTabletServer> > tablet_servers_;
   std::unique_ptr<MiniKdc> kdc_;
+  std::unique_ptr<hms::MiniHms> hms_;
 
   std::shared_ptr<rpc::Messenger> messenger_;
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/b418e88b/src/kudu/tools/tool.proto
----------------------------------------------------------------------
diff --git a/src/kudu/tools/tool.proto b/src/kudu/tools/tool.proto
index ff0b0db..8a30bf1 100644
--- a/src/kudu/tools/tool.proto
+++ b/src/kudu/tools/tool.proto
@@ -40,6 +40,9 @@ message CreateClusterRequestPB {
   // Whether or not the cluster should be Kerberized.
   optional bool enable_kerberos = 3;
 
+  // Whether or not to create a Hive Metastore and enable the HMS integration.
+  optional bool enable_hive_metastore = 7;
+
   // The directory where the cluster's data and logs should be placed.
   optional string data_root = 4;
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/b418e88b/src/kudu/tools/tool_action_test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/tool_action_test.cc b/src/kudu/tools/tool_action_test.cc
index 0373577..fd8a7cf 100644
--- a/src/kudu/tools/tool_action_test.cc
+++ b/src/kudu/tools/tool_action_test.cc
@@ -157,6 +157,7 @@ Status ProcessRequest(const ControlShellRequestPB& req,
         opts.num_tablet_servers = cc.num_tservers();
       }
       opts.enable_kerberos = cc.enable_kerberos();
+      opts.enable_hive_metastore = cc.enable_hive_metastore();
       if (cc.has_data_root()) {
         opts.data_root = cc.data_root();
       } else {


[2/5] kudu git commit: scripts: better documentation for get-job-stats-from-mysql.py

Posted by to...@apache.org.
scripts: better documentation for get-job-stats-from-mysql.py

The parameter named 'job_name' was (confusingly) substituted in as the
query's workload.

Change-Id: I3d26c783d66b9420d5ced50363c9421e7c1944bc
Reviewed-on: http://gerrit.cloudera.org:8080/8453
Tested-by: Kudu Jenkins
Reviewed-by: Todd Lipcon <to...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/d8c39d24
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/d8c39d24
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/d8c39d24

Branch: refs/heads/master
Commit: d8c39d24b4d88786e7294b9c002c72219a946ca5
Parents: 3e86797
Author: Adar Dembo <ad...@cloudera.com>
Authored: Thu Nov 2 12:22:49 2017 -0700
Committer: Todd Lipcon <to...@apache.org>
Committed: Fri Nov 3 00:05:41 2017 +0000

----------------------------------------------------------------------
 src/kudu/scripts/get-job-stats-from-mysql.py | 22 +++++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/d8c39d24/src/kudu/scripts/get-job-stats-from-mysql.py
----------------------------------------------------------------------
diff --git a/src/kudu/scripts/get-job-stats-from-mysql.py b/src/kudu/scripts/get-job-stats-from-mysql.py
index 5850a8a..5748886 100644
--- a/src/kudu/scripts/get-job-stats-from-mysql.py
+++ b/src/kudu/scripts/get-job-stats-from-mysql.py
@@ -16,13 +16,29 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
+#
+# Fetches the last N days worth of stats of a particular workload from the
+# MySQL database housing test performance stats.
+#
+# Here's the database schema for kudu_perf_tpch:
+#
+# +--------------+--------------+------+-----+-------------------+-------+
+# | Field        | Type         | Null | Key | Default           | Extra |
+# +--------------+--------------+------+-----+-------------------+-------+
+# | job_name     | varchar(50)  | YES  |     | NULL              |       |
+# | build_number | int(11)      | YES  |     | NULL              |       |
+# | workload     | varchar(100) | YES  |     | NULL              |       |
+# | iteration    | int(2)       | YES  |     | NULL              |       |
+# | runtime      | float        | YES  |     | NULL              |       |
+# | curr_date    | timestamp    | NO   |     | CURRENT_TIMESTAMP |       |
+# +--------------+--------------+------+-----+-------------------+-------+
 
 import MySQLdb as mdb
 import sys
 import os
 
 if len(sys.argv) < 3:
-  sys.exit("usage: %s <job_name> <days_count_to_fetch>" % sys.argv[0])
+  sys.exit("usage: %s <workload> <days_count_to_fetch>" % sys.argv[0])
 
 host = os.environ["MYSQLHOST"]
 user = os.environ["MYSQLUSER"]
@@ -32,9 +48,9 @@ db = os.environ["MYSQLDB"]
 con = mdb.connect(host, user, pwd, db)
 with con:
   cur = con.cursor()
-  job_name = sys.argv[1]
+  workload = sys.argv[1]
   days = sys.argv[2]
-  cur.execute("select workload, runtime, build_number from kudu_perf_tpch where workload like %s AND curr_date >= DATE_SUB(NOW(), INTERVAL %s DAY) and runtime != 0 ORDER BY workload, build_number, curr_date", (job_name, days))
+  cur.execute("select workload, runtime, build_number from kudu_perf_tpch where workload like %s AND curr_date >= DATE_SUB(NOW(), INTERVAL %s DAY) and runtime != 0 ORDER BY workload, build_number, curr_date", (workload, days))
   rows = cur.fetchall()
   print 'workload', '\t', 'runtime', '\t', 'build_number'
   for row in rows: