You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by gr...@apache.org on 2019/08/08 20:57:35 UTC

[kudu] branch master updated: Prepare for upgrading to Hive 3

This is an automated email from the ASF dual-hosted git repository.

granthenke pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
     new 76b80ec  Prepare for upgrading to Hive 3
76b80ec is described below

commit 76b80ec01e152f1f9b54ac5a959d7f0cf5ac9f93
Author: Grant Henke <gr...@apache.org>
AuthorDate: Tue Aug 6 08:18:05 2019 -0500

    Prepare for upgrading to Hive 3
    
    This patch contains build and test changes that work on both Hive 2 and
    Hive 3 to minimize the changes required when upgrading to Hive 3.
    
    - Uses the Hive `schematool` to initialize the derby database in the
    MiniHms. This fixes issues with autoCreate and is more representative of a
    production environment.
    - Adjust logging configuration in the MiniHms to be more explicit.
    - Workaround HADOOP-8719 by hardcoding the HADOOP_OS_TYPE.
    - Workaround HIVE-21614 by using `LIKE` instead of `=` when filtering
    tables.
    
    Change-Id: If43ae2330b3d99374c68bae313a3f8bc070f9c69
    Reviewed-on: http://gerrit.cloudera.org:8080/14018
    Reviewed-by: Hao Hao <ha...@cloudera.com>
    Tested-by: Kudu Jenkins
---
 src/kudu/hms/CMakeLists.txt     |  5 +++-
 src/kudu/hms/hms_catalog.cc     |  3 ++-
 src/kudu/hms/hms_client-test.cc |  3 ++-
 src/kudu/hms/mini_hms.cc        | 51 ++++++++++++++++++++++++++---------------
 src/kudu/hms/mini_hms.h         |  3 +++
 src/kudu/util/subprocess.cc     |  8 ++++++-
 src/kudu/util/subprocess.h      |  7 +++++-
 thirdparty/package-hadoop.sh    |  1 +
 8 files changed, 58 insertions(+), 23 deletions(-)

diff --git a/src/kudu/hms/CMakeLists.txt b/src/kudu/hms/CMakeLists.txt
index 287307d..d23ff4b 100644
--- a/src/kudu/hms/CMakeLists.txt
+++ b/src/kudu/hms/CMakeLists.txt
@@ -51,6 +51,8 @@ target_link_libraries(kudu_hms ${HMS_DEPS})
 # mini_hms
 ##############################
 
+# Link the home directories, so that they can be found via
+# test_util::FindHomeDir in MiniHms::Start.
 execute_process(COMMAND ln -nsf
                 "${CMAKE_SOURCE_DIR}/thirdparty/installed/common/opt/hive"
                 "${EXECUTABLE_OUTPUT_PATH}/hive-home")
@@ -63,7 +65,8 @@ execute_process(COMMAND ln -nsf
 
 file(GLOB DEPENDENCY_JARS
   "${CMAKE_SOURCE_DIR}/thirdparty/installed/common/opt/hive/lib/*"
-  "${CMAKE_SOURCE_DIR}/thirdparty/installed/common/opt/hadoop/share/hadoop/common/*")
+  "${CMAKE_SOURCE_DIR}/thirdparty/installed/common/opt/hadoop/share/hadoop/common/*"
+  "${CMAKE_SOURCE_DIR}/thirdparty/installed/common/opt/hadoop/share/hadoop/common/lib/*")
 
 add_jar(hms-plugin
   "${CMAKE_SOURCE_DIR}/java/kudu-hive/src/main/java/org/apache/kudu/hive/metastore/KuduMetastorePlugin.java"
diff --git a/src/kudu/hms/hms_catalog.cc b/src/kudu/hms/hms_catalog.cc
index 686bcdd..197bf99 100644
--- a/src/kudu/hms/hms_catalog.cc
+++ b/src/kudu/hms/hms_catalog.cc
@@ -230,9 +230,10 @@ Status HmsCatalog::GetKuduTables(vector<hive::Table>* kudu_tables) {
     for (const auto& database_name : database_names) {
       table_names.clear();
       tables.clear();
+      // NOTE: LIKE filters are used instead of = filters due to HIVE-21614
       RETURN_NOT_OK(client->GetTableNames(
             database_name,
-            Substitute("$0$1 = \"$2\" OR $0$1 = \"$3\"",
+            Substitute("$0$1 LIKE \"$2\" OR $0$1 LIKE \"$3\"",
               HmsClient::kHiveFilterFieldParams,
               HmsClient::kStorageHandlerKey,
               HmsClient::kKuduStorageHandler,
diff --git a/src/kudu/hms/hms_client-test.cc b/src/kudu/hms/hms_client-test.cc
index 6fbc121..5375b69 100644
--- a/src/kudu/hms/hms_client-test.cc
+++ b/src/kudu/hms/hms_client-test.cc
@@ -223,9 +223,10 @@ TEST_P(HmsClientTest, TestHmsOperations) {
       << "table names: " << table_names;
 
   // Get filtered table names.
+  // NOTE: LIKE filters are used instead of = filters due to HIVE-21614
   table_names.clear();
   string filter = Substitute(
-      "$0$1 = \"$2\"", HmsClient::kHiveFilterFieldParams,
+      "$0$1 LIKE \"$2\"", HmsClient::kHiveFilterFieldParams,
       HmsClient::kStorageHandlerKey, HmsClient::kKuduStorageHandler);
   ASSERT_OK(client.GetTableNames(database_name, filter, &table_names))
   std::sort(table_names.begin(), table_names.end());
diff --git a/src/kudu/hms/mini_hms.cc b/src/kudu/hms/mini_hms.cc
index 1400f60..98db795 100644
--- a/src/kudu/hms/mini_hms.cc
+++ b/src/kudu/hms/mini_hms.cc
@@ -149,12 +149,25 @@ Status MiniHms::Start() {
   map<string, string> env_vars {
       { "JAVA_HOME", java_home },
       { "HADOOP_HOME", hadoop_home },
+      { "HADOOP_CONF_DIR",  Substitute("$0/etc/hadoop", hadoop_home) },
       { "HIVE_AUX_JARS_PATH", aux_jars },
       { "HIVE_CONF_DIR", data_root_ },
       { "JAVA_TOOL_OPTIONS", java_options },
       { "HADOOP_CONF_DIR", data_root_ },
+      // Set HADOOP_OS_TYPE=Linux due to HADOOP-8719.
+      // TODO(ghenke): Remove after HADOOP-15966 is available (Hadoop 3.1.3+)
+      { "HADOOP_OS_TYPE", "Linux" }
   };
 
+  if (!schema_initialized_) {
+    // Run the schematool to initialize the database.
+    RETURN_NOT_OK(Subprocess::Call({Substitute("$0/bin/schematool", hive_home),
+                                    "-dbType", "derby", "-initSchema"}, "",
+                                   nullptr, nullptr,
+                                   env_vars));
+    schema_initialized_ = true;
+  }
+
   // Start the HMS.
   hms_process_.reset(new Subprocess({
         Substitute("$0/bin/hive", hive_home),
@@ -215,10 +228,6 @@ Status MiniHms::CreateHiveSite() const {
   const string listeners = Substitute("org.apache.hive.hcatalog.listener.DbNotificationListener$0",
       enable_kudu_plugin_ ? ",org.apache.kudu.hive.metastore.KuduMetastorePlugin" : "");
 
-  // - datanucleus.schema.autoCreateAll
-  // - hive.metastore.schema.verification
-  //     Allow Hive to startup and run without first running the schemaTool.
-  //
   // - hive.metastore.event.db.listener.timetolive
   //     Configures how long the Metastore will store notification log events
   //     before GCing them.
@@ -239,6 +248,14 @@ Status MiniHms::CreateHiveSite() const {
   //     Configures the HMS to add the entire thrift Table/Partition
   //     objects to the HMS notifications.
   //
+  // - hive.metastore.event.db.notification.api.auth
+  //     Disables the authorization on the DbNotificationListener related
+  //     metastore APIs such as get_next_notification. If set to true, then
+  //     only the superusers in proxy settings have the permission.
+  //
+  // - hive.log4j.file
+  //     Configures the location of the HMS log4j configuration.
+  //
   static const string kHiveFileTemplate = R"(
 <configuration>
   <property>
@@ -249,16 +266,6 @@ Status MiniHms::CreateHiveSite() const {
   </property>
 
   <property>
-    <name>datanucleus.schema.autoCreateAll</name>
-    <value>true</value>
-  </property>
-
-  <property>
-    <name>hive.metastore.schema.verification</name>
-    <value>false</value>
-  </property>
-
-  <property>
     <name>hive.metastore.warehouse.dir</name>
     <value>file://$2/warehouse/</value>
   </property>
@@ -308,8 +315,17 @@ Status MiniHms::CreateHiveSite() const {
     <value>true</value>
   </property>
 
-  $7
+  <property>
+    <name>hive.metastore.event.db.notification.api.auth</name>
+    <value>false</value>
+  </property>
+
+  <property>
+    <name>hive.log4j.file</name>
+    <value>$7</value>
+  </property>
 
+  $8
 </configuration>
   )";
 
@@ -367,6 +383,7 @@ Status MiniHms::CreateHiveSite() const {
                                          keytab_file_,
                                          service_principal_,
                                          SaslProtection::name_of(protection_),
+                                         JoinPathSegments(data_root_, "hive-log4j2.properties"),
                                          sentry_properties);
 
   if (IsAuthorizationEnabled()) {
@@ -463,9 +480,7 @@ Status MiniHms::CreateCoreSite() const {
 Status MiniHms::CreateLogConfig() const {
   // Configure the HMS to output ERROR messages to the stderr console, and INFO
   // and above to hms.log in the data root. The console messages have a special
-  // 'HMS' tag included to disambiguate them from other Java component logs. The
-  // HMS automatically looks for a logging configuration named
-  // 'hive-log4j2.properties' in the configured HIVE_CONF_DIR.
+  // 'HMS' tag included to disambiguate them from other Java component logs.
   static const string kFileTemplate = R"(
 appender.console.type = Console
 appender.console.name = console
diff --git a/src/kudu/hms/mini_hms.h b/src/kudu/hms/mini_hms.h
index 73ac2df..af30bdc 100644
--- a/src/kudu/hms/mini_hms.h
+++ b/src/kudu/hms/mini_hms.h
@@ -130,6 +130,9 @@ class MiniHms {
 
   // Whether to enable the Kudu listener plugin.
   bool enable_kudu_plugin_ = true;
+
+  // Whether the Hive metastore schema has been initialized.
+  bool schema_initialized_ = false;
 };
 
 } // namespace hms
diff --git a/src/kudu/util/subprocess.cc b/src/kudu/util/subprocess.cc
index f992ba4..2a3e8fd 100644
--- a/src/kudu/util/subprocess.cc
+++ b/src/kudu/util/subprocess.cc
@@ -711,7 +711,8 @@ Status Subprocess::Call(const string& arg_str) {
 Status Subprocess::Call(const vector<string>& argv,
                         const string& stdin_in,
                         string* stdout_out,
-                        string* stderr_out) {
+                        string* stderr_out,
+                        map<string, string> env_vars) {
   Subprocess p(argv);
 
   if (stdout_out) {
@@ -720,6 +721,11 @@ Status Subprocess::Call(const vector<string>& argv,
   if (stderr_out) {
     p.ShareParentStderr(false);
   }
+
+  if (!env_vars.empty()) {
+    p.SetEnvVars(std::move(env_vars));
+  }
+
   RETURN_NOT_OK_PREPEND(p.Start(),
                         "Unable to fork " + argv[0]);
 
diff --git a/src/kudu/util/subprocess.h b/src/kudu/util/subprocess.h
index 4d33c8f..4088c2e 100644
--- a/src/kudu/util/subprocess.h
+++ b/src/kudu/util/subprocess.h
@@ -143,10 +143,15 @@ class Subprocess {
   //
   // Also collects the output from the child process stdout and stderr into
   // 'stdout_out' and 'stderr_out' respectively.
+  //
+  // Optionally allows a passed map of environment variables to be set
+  // on the subprocess via `env_vars`.
   static Status Call(const std::vector<std::string>& argv,
                      const std::string& stdin_in = "",
                      std::string* stdout_out = nullptr,
-                     std::string* stderr_out = nullptr) WARN_UNUSED_RESULT;
+                     std::string* stderr_out = nullptr,
+                     std::map<std::string, std::string> env_vars = {})
+                     WARN_UNUSED_RESULT;
 
   // Return the pipe fd to the child's standard stream.
   // Stream should not be disabled or shared.
diff --git a/thirdparty/package-hadoop.sh b/thirdparty/package-hadoop.sh
index ea5ec33..452bd1a 100755
--- a/thirdparty/package-hadoop.sh
+++ b/thirdparty/package-hadoop.sh
@@ -76,6 +76,7 @@ fi
 DIRS="client"
 DIRS="$DIRS common/jdiff"
 DIRS="$DIRS common/sources"
+DIRS="$DIRS common/webapps"
 DIRS="$DIRS hdfs/sources"
 DIRS="$DIRS httpfs"
 DIRS="$DIRS kms"