You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by gr...@apache.org on 2019/08/08 20:57:35 UTC
[kudu] branch master updated: Prepare for upgrading to Hive 3
This is an automated email from the ASF dual-hosted git repository.
granthenke pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
The following commit(s) were added to refs/heads/master by this push:
new 76b80ec Prepare for upgrading to Hive 3
76b80ec is described below
commit 76b80ec01e152f1f9b54ac5a959d7f0cf5ac9f93
Author: Grant Henke <gr...@apache.org>
AuthorDate: Tue Aug 6 08:18:05 2019 -0500
Prepare for upgrading to Hive 3
This patch contains build and test changes that work on both Hive 2 and
Hive 3 to minimize the changes required when upgrading to Hive 3.
- Uses the Hive `schematool` to initialize the derby database in the
MiniHms. This fixes issues with autoCreate and is more representative of a
production environment.
- Adjust logging configuration in the MiniHms to be more explicit.
- Workaround HADOOP-8719 by hardcoding the HADOOP_OS_TYPE.
- Workaround HIVE-21614 by using `LIKE` instead of `=` when filtering
tables.
Change-Id: If43ae2330b3d99374c68bae313a3f8bc070f9c69
Reviewed-on: http://gerrit.cloudera.org:8080/14018
Reviewed-by: Hao Hao <ha...@cloudera.com>
Tested-by: Kudu Jenkins
---
src/kudu/hms/CMakeLists.txt | 5 +++-
src/kudu/hms/hms_catalog.cc | 3 ++-
src/kudu/hms/hms_client-test.cc | 3 ++-
src/kudu/hms/mini_hms.cc | 51 ++++++++++++++++++++++++++---------------
src/kudu/hms/mini_hms.h | 3 +++
src/kudu/util/subprocess.cc | 8 ++++++-
src/kudu/util/subprocess.h | 7 +++++-
thirdparty/package-hadoop.sh | 1 +
8 files changed, 58 insertions(+), 23 deletions(-)
diff --git a/src/kudu/hms/CMakeLists.txt b/src/kudu/hms/CMakeLists.txt
index 287307d..d23ff4b 100644
--- a/src/kudu/hms/CMakeLists.txt
+++ b/src/kudu/hms/CMakeLists.txt
@@ -51,6 +51,8 @@ target_link_libraries(kudu_hms ${HMS_DEPS})
# mini_hms
##############################
+# Link the home directories, so that they can be found via
+# test_util::FindHomeDir in MiniHms::Start.
execute_process(COMMAND ln -nsf
"${CMAKE_SOURCE_DIR}/thirdparty/installed/common/opt/hive"
"${EXECUTABLE_OUTPUT_PATH}/hive-home")
@@ -63,7 +65,8 @@ execute_process(COMMAND ln -nsf
file(GLOB DEPENDENCY_JARS
"${CMAKE_SOURCE_DIR}/thirdparty/installed/common/opt/hive/lib/*"
- "${CMAKE_SOURCE_DIR}/thirdparty/installed/common/opt/hadoop/share/hadoop/common/*")
+ "${CMAKE_SOURCE_DIR}/thirdparty/installed/common/opt/hadoop/share/hadoop/common/*"
+ "${CMAKE_SOURCE_DIR}/thirdparty/installed/common/opt/hadoop/share/hadoop/common/lib/*")
add_jar(hms-plugin
"${CMAKE_SOURCE_DIR}/java/kudu-hive/src/main/java/org/apache/kudu/hive/metastore/KuduMetastorePlugin.java"
diff --git a/src/kudu/hms/hms_catalog.cc b/src/kudu/hms/hms_catalog.cc
index 686bcdd..197bf99 100644
--- a/src/kudu/hms/hms_catalog.cc
+++ b/src/kudu/hms/hms_catalog.cc
@@ -230,9 +230,10 @@ Status HmsCatalog::GetKuduTables(vector<hive::Table>* kudu_tables) {
for (const auto& database_name : database_names) {
table_names.clear();
tables.clear();
+ // NOTE: LIKE filters are used instead of = filters due to HIVE-21614
RETURN_NOT_OK(client->GetTableNames(
database_name,
- Substitute("$0$1 = \"$2\" OR $0$1 = \"$3\"",
+ Substitute("$0$1 LIKE \"$2\" OR $0$1 LIKE \"$3\"",
HmsClient::kHiveFilterFieldParams,
HmsClient::kStorageHandlerKey,
HmsClient::kKuduStorageHandler,
diff --git a/src/kudu/hms/hms_client-test.cc b/src/kudu/hms/hms_client-test.cc
index 6fbc121..5375b69 100644
--- a/src/kudu/hms/hms_client-test.cc
+++ b/src/kudu/hms/hms_client-test.cc
@@ -223,9 +223,10 @@ TEST_P(HmsClientTest, TestHmsOperations) {
<< "table names: " << table_names;
// Get filtered table names.
+ // NOTE: LIKE filters are used instead of = filters due to HIVE-21614
table_names.clear();
string filter = Substitute(
- "$0$1 = \"$2\"", HmsClient::kHiveFilterFieldParams,
+ "$0$1 LIKE \"$2\"", HmsClient::kHiveFilterFieldParams,
HmsClient::kStorageHandlerKey, HmsClient::kKuduStorageHandler);
ASSERT_OK(client.GetTableNames(database_name, filter, &table_names))
std::sort(table_names.begin(), table_names.end());
diff --git a/src/kudu/hms/mini_hms.cc b/src/kudu/hms/mini_hms.cc
index 1400f60..98db795 100644
--- a/src/kudu/hms/mini_hms.cc
+++ b/src/kudu/hms/mini_hms.cc
@@ -149,12 +149,25 @@ Status MiniHms::Start() {
map<string, string> env_vars {
{ "JAVA_HOME", java_home },
{ "HADOOP_HOME", hadoop_home },
+ { "HADOOP_CONF_DIR", Substitute("$0/etc/hadoop", hadoop_home) },
{ "HIVE_AUX_JARS_PATH", aux_jars },
{ "HIVE_CONF_DIR", data_root_ },
{ "JAVA_TOOL_OPTIONS", java_options },
{ "HADOOP_CONF_DIR", data_root_ },
+ // Set HADOOP_OS_TYPE=Linux due to HADOOP-8719.
+ // TODO(ghenke): Remove after HADOOP-15966 is available (Hadoop 3.1.3+)
+ { "HADOOP_OS_TYPE", "Linux" }
};
+ if (!schema_initialized_) {
+ // Run the schematool to initialize the database.
+ RETURN_NOT_OK(Subprocess::Call({Substitute("$0/bin/schematool", hive_home),
+ "-dbType", "derby", "-initSchema"}, "",
+ nullptr, nullptr,
+ env_vars));
+ schema_initialized_ = true;
+ }
+
// Start the HMS.
hms_process_.reset(new Subprocess({
Substitute("$0/bin/hive", hive_home),
@@ -215,10 +228,6 @@ Status MiniHms::CreateHiveSite() const {
const string listeners = Substitute("org.apache.hive.hcatalog.listener.DbNotificationListener$0",
enable_kudu_plugin_ ? ",org.apache.kudu.hive.metastore.KuduMetastorePlugin" : "");
- // - datanucleus.schema.autoCreateAll
- // - hive.metastore.schema.verification
- // Allow Hive to startup and run without first running the schemaTool.
- //
// - hive.metastore.event.db.listener.timetolive
// Configures how long the Metastore will store notification log events
// before GCing them.
@@ -239,6 +248,14 @@ Status MiniHms::CreateHiveSite() const {
// Configures the HMS to add the entire thrift Table/Partition
// objects to the HMS notifications.
//
+ // - hive.metastore.event.db.notification.api.auth
+ // Disables the authorization on the DbNotificationListener related
+ // metastore APIs such as get_next_notification. If set to true, then
+ // only the superusers in proxy settings have the permission.
+ //
+ // - hive.log4j.file
+ // Configures the location of the HMS log4j configuration.
+ //
static const string kHiveFileTemplate = R"(
<configuration>
<property>
@@ -249,16 +266,6 @@ Status MiniHms::CreateHiveSite() const {
</property>
<property>
- <name>datanucleus.schema.autoCreateAll</name>
- <value>true</value>
- </property>
-
- <property>
- <name>hive.metastore.schema.verification</name>
- <value>false</value>
- </property>
-
- <property>
<name>hive.metastore.warehouse.dir</name>
<value>file://$2/warehouse/</value>
</property>
@@ -308,8 +315,17 @@ Status MiniHms::CreateHiveSite() const {
<value>true</value>
</property>
- $7
+ <property>
+ <name>hive.metastore.event.db.notification.api.auth</name>
+ <value>false</value>
+ </property>
+
+ <property>
+ <name>hive.log4j.file</name>
+ <value>$7</value>
+ </property>
+ $8
</configuration>
)";
@@ -367,6 +383,7 @@ Status MiniHms::CreateHiveSite() const {
keytab_file_,
service_principal_,
SaslProtection::name_of(protection_),
+ JoinPathSegments(data_root_, "hive-log4j2.properties"),
sentry_properties);
if (IsAuthorizationEnabled()) {
@@ -463,9 +480,7 @@ Status MiniHms::CreateCoreSite() const {
Status MiniHms::CreateLogConfig() const {
// Configure the HMS to output ERROR messages to the stderr console, and INFO
// and above to hms.log in the data root. The console messages have a special
- // 'HMS' tag included to disambiguate them from other Java component logs. The
- // HMS automatically looks for a logging configuration named
- // 'hive-log4j2.properties' in the configured HIVE_CONF_DIR.
+ // 'HMS' tag included to disambiguate them from other Java component logs.
static const string kFileTemplate = R"(
appender.console.type = Console
appender.console.name = console
diff --git a/src/kudu/hms/mini_hms.h b/src/kudu/hms/mini_hms.h
index 73ac2df..af30bdc 100644
--- a/src/kudu/hms/mini_hms.h
+++ b/src/kudu/hms/mini_hms.h
@@ -130,6 +130,9 @@ class MiniHms {
// Whether to enable the Kudu listener plugin.
bool enable_kudu_plugin_ = true;
+
+ // Whether the Hive metastore schema has been initialized.
+ bool schema_initialized_ = false;
};
} // namespace hms
diff --git a/src/kudu/util/subprocess.cc b/src/kudu/util/subprocess.cc
index f992ba4..2a3e8fd 100644
--- a/src/kudu/util/subprocess.cc
+++ b/src/kudu/util/subprocess.cc
@@ -711,7 +711,8 @@ Status Subprocess::Call(const string& arg_str) {
Status Subprocess::Call(const vector<string>& argv,
const string& stdin_in,
string* stdout_out,
- string* stderr_out) {
+ string* stderr_out,
+ map<string, string> env_vars) {
Subprocess p(argv);
if (stdout_out) {
@@ -720,6 +721,11 @@ Status Subprocess::Call(const vector<string>& argv,
if (stderr_out) {
p.ShareParentStderr(false);
}
+
+ if (!env_vars.empty()) {
+ p.SetEnvVars(std::move(env_vars));
+ }
+
RETURN_NOT_OK_PREPEND(p.Start(),
"Unable to fork " + argv[0]);
diff --git a/src/kudu/util/subprocess.h b/src/kudu/util/subprocess.h
index 4d33c8f..4088c2e 100644
--- a/src/kudu/util/subprocess.h
+++ b/src/kudu/util/subprocess.h
@@ -143,10 +143,15 @@ class Subprocess {
//
// Also collects the output from the child process stdout and stderr into
// 'stdout_out' and 'stderr_out' respectively.
+ //
+ // Optionally allows a passed map of environment variables to be set
+ // on the subprocess via `env_vars`.
static Status Call(const std::vector<std::string>& argv,
const std::string& stdin_in = "",
std::string* stdout_out = nullptr,
- std::string* stderr_out = nullptr) WARN_UNUSED_RESULT;
+ std::string* stderr_out = nullptr,
+ std::map<std::string, std::string> env_vars = {})
+ WARN_UNUSED_RESULT;
// Return the pipe fd to the child's standard stream.
// Stream should not be disabled or shared.
diff --git a/thirdparty/package-hadoop.sh b/thirdparty/package-hadoop.sh
index ea5ec33..452bd1a 100755
--- a/thirdparty/package-hadoop.sh
+++ b/thirdparty/package-hadoop.sh
@@ -76,6 +76,7 @@ fi
DIRS="client"
DIRS="$DIRS common/jdiff"
DIRS="$DIRS common/sources"
+DIRS="$DIRS common/webapps"
DIRS="$DIRS hdfs/sources"
DIRS="$DIRS httpfs"
DIRS="$DIRS kms"