You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2020/02/08 17:44:25 UTC
[impala] 03/04: IMPALA-9361: manually configured kerberized
minicluster
This is an automated email from the ASF dual-hosted git repository.
tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
commit 6f150d383c64d9adb8e11c73c5bb18d3f5b6ff97
Author: Tim Armstrong <ta...@cloudera.com>
AuthorDate: Mon Feb 3 14:51:25 2020 -0800
IMPALA-9361: manually configured kerberized minicluster
The kerberized minicluster is enabled by setting
IMPALA_KERBERIZE=true in impala-config-*.sh.
After setting it you must run ./bin/create-test-configuration.sh
then restart minicluster.
This adds a script to partially automate setup of a local KDC,
in lieu of the unmaintained minikdc support (which has been ripped
out).
Testing:
I was able to run some queries against pre-created HDFS tables
with kerberos enabled.
Change-Id: Ib34101d132e9c9d59da14537edf7d096f25e9bee
Reviewed-on: http://gerrit.cloudera.org:8080/15159
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
bin/bootstrap_toolchain.py | 10 -
bin/create-test-configuration.sh | 14 +-
bin/impala-config.sh | 8 +-
bin/kerberos/README-kerberos.md | 21 ++
bin/kerberos/experimental-kerberos-setup.sh | 115 +++++++++++
bin/rat_exclude_files.txt | 1 +
bin/start-impala-cluster.py | 1 +
buildall.sh | 20 +-
fe/src/test/resources/hive-site.xml.py | 2 +-
testdata/bin/minikdc.sh | 218 ---------------------
testdata/bin/minikdc_env.sh | 25 ++-
testdata/bin/run-all.sh | 17 +-
testdata/bin/run-hbase.sh | 2 +-
testdata/cluster/admin | 31 +--
.../common/etc/hadoop/conf/core-site.xml.py | 2 +-
.../common/etc/hadoop/conf/yarn-site.xml.py | 2 +-
16 files changed, 188 insertions(+), 301 deletions(-)
diff --git a/bin/bootstrap_toolchain.py b/bin/bootstrap_toolchain.py
index d6a58ee..546f388 100755
--- a/bin/bootstrap_toolchain.py
+++ b/bin/bootstrap_toolchain.py
@@ -629,15 +629,6 @@ def get_hadoop_downloads():
# Ranger is always CDP
cluster_components.append(CdpComponent("ranger",
archive_basename_tmpl="ranger-${version}-admin"))
-
- # llama-minikdc is used for testing and not something that Impala needs to be built
- # against. It does not get updated very frequently unlike the other CDH components.
- # It is stored in a special location compared to other components.
- toolchain_host = os.environ["IMPALA_TOOLCHAIN_HOST"]
- download_path_prefix = "https://{0}/build/cdh_components/".format(toolchain_host)
- destination_basedir = os.environ["CDH_COMPONENTS_HOME"]
- cluster_components += [EnvVersionedPackage("llama-minikdc", download_path_prefix,
- destination_basedir)]
return cluster_components
@@ -686,7 +677,6 @@ def main():
If false, most components get the CDH versions based on the $CDH_BUILD_NUMBER.
The exceptions are:
- sentry (always downloaded from $IMPALA_TOOLCHAIN_HOST for a given $CDH_BUILD_NUMBER)
- - llama-minikdc (always downloaded from $TOOLCHAIN_HOST)
- ranger (always downloaded from $IMPALA_TOOLCHAIN_HOST for a given $CDP_BUILD_NUMBER)
- kudu (currently always downloaded from $IMPALA_TOOLCHAIN_HOST for a given
$CDH_BUILD_NUMBER)
diff --git a/bin/create-test-configuration.sh b/bin/create-test-configuration.sh
index d1f5dc8..32d56c3 100755
--- a/bin/create-test-configuration.sh
+++ b/bin/create-test-configuration.sh
@@ -46,7 +46,7 @@ function generate_config {
perl -wpl -e 's/\$\{([^}]+)\}/defined $ENV{$1} ? $ENV{$1} : $&/eg' \
"${GCIN}" > "${GCOUT}.tmp"
- if [ "${IMPALA_KERBERIZE}" = "" ]; then
+ if [[ "${IMPALA_KERBERIZE}" != "true" ]]; then
sed '/<!-- BEGIN Kerberos/,/END Kerberos settings -->/d' \
"${GCOUT}.tmp" > "${GCOUT}"
else
@@ -67,7 +67,6 @@ function generate_config {
CREATE_METASTORE=0
CREATE_SENTRY_POLICY_DB=0
CREATE_RANGER_POLICY_DB=0
-: ${IMPALA_KERBERIZE=}
# parse command line options
for ARG in $*
@@ -82,15 +81,10 @@ do
-create_ranger_policy_db)
CREATE_RANGER_POLICY_DB=1
;;
- -k|-kerberize|-kerberos|-kerb)
- # This could also come in through the environment...
- export IMPALA_KERBERIZE=1
- ;;
-help|*)
echo "[-create_metastore] : If true, creates a new metastore."
echo "[-create_sentry_policy_db] : If true, creates a new sentry policy db."
echo "[-create_ranger_policy_db] : If true, creates a new Ranger policy db."
- echo "[-kerberize] : Enable kerberos on the cluster"
exit 1
;;
esac
@@ -104,7 +98,7 @@ fi
${CLUSTER_DIR}/admin create_cluster
-if [ ! -z "${IMPALA_KERBERIZE}" ]; then
+if [[ "${IMPALA_KERBERIZE}" = "true" ]]; then
# Sanity check...
if ! ${CLUSTER_DIR}/admin is_kerberized; then
echo "Kerberized cluster not created, even though told to."
@@ -192,7 +186,7 @@ for file in core-site.xml hdfs-site.xml yarn-site.xml ; do
ln -s ${CLUSTER_HADOOP_CONF_DIR}/$file
done
-if ${CLUSTER_DIR}/admin is_kerberized; then
+if [[ "${IMPALA_KERBERIZE}" = "true" ]]; then
# KERBEROS TODO: Without this, the yarn daemons can see these
# files, but mapreduce jobs *cannot* see these files. This seems
# strange, but making these symlinks also results in data loading
@@ -214,7 +208,7 @@ for SENTRY_VARIANT in oo oo_nogrant no_oo ; do
sentry-site_${SENTRY_VARIANT}.xml
done
-if [ ! -z "${IMPALA_KERBERIZE}" ]; then
+if [[ "${IMPALA_KERBERIZE}" = "true" ]]; then
generate_config hbase-jaas-server.conf.template hbase-jaas-server.conf
generate_config hbase-jaas-client.conf.template hbase-jaas-client.conf
fi
diff --git a/bin/impala-config.sh b/bin/impala-config.sh
index 1ad556f..c21088e 100755
--- a/bin/impala-config.sh
+++ b/bin/impala-config.sh
@@ -186,7 +186,6 @@ export CDP_KNOX_VERSION=1.3.0.7.0.2.0-212
export IMPALA_PARQUET_VERSION=1.10.99-cdh6.x-SNAPSHOT
export IMPALA_AVRO_JAVA_VERSION=1.8.2-cdh6.x-SNAPSHOT
-export IMPALA_LLAMA_MINIKDC_VERSION=1.0.0
export IMPALA_KITE_VERSION=1.0.0-cdh6.x-SNAPSHOT
export IMPALA_KUDU_JAVA_VERSION=1.11.0-cdh6.x-SNAPSHOT
export IMPALA_ORC_JAVA_VERSION=1.6.2
@@ -199,7 +198,8 @@ unset IMPALA_HIVE_URL
unset IMPALA_KUDU_URL
unset IMPALA_KUDU_VERSION
unset IMPALA_SENTRY_URL
-unset IMPALA_LLAMA_MINIKDC_URL
+
+export IMPALA_KERBERIZE=false
# Source the branch and local config override files here to override any
# variables above or any variables below that allow overriding via environment
@@ -603,7 +603,6 @@ HADOOP_CLASSPATH="${HADOOP_CLASSPATH}:${HADOOP_HOME}/share/hadoop/tools/lib/*"
export MINI_DFS_BASE_DATA_DIR="$IMPALA_HOME/cdh-${CDH_MAJOR_VERSION}-hdfs-data"
export PATH="$HADOOP_HOME/bin:$PATH"
-export MINIKDC_HOME="$CDH_COMPONENTS_HOME/llama-minikdc-${IMPALA_LLAMA_MINIKDC_VERSION}"
export SENTRY_HOME="$CDH_COMPONENTS_HOME/sentry-${IMPALA_SENTRY_VERSION}"
export SENTRY_CONF_DIR="$IMPALA_HOME/fe/src/test/resources"
@@ -796,7 +795,6 @@ echo "SENTRY_HOME = $SENTRY_HOME"
echo "SENTRY_CONF_DIR = $SENTRY_CONF_DIR"
echo "RANGER_HOME = $RANGER_HOME"
echo "RANGER_CONF_DIR = $RANGER_CONF_DIR "
-echo "MINIKDC_HOME = $MINIKDC_HOME"
echo "THRIFT_HOME = $THRIFT_HOME"
echo "HADOOP_LZO = $HADOOP_LZO"
echo "IMPALA_LZO = $IMPALA_LZO"
@@ -834,7 +832,7 @@ if "${CLUSTER_DIR}/admin" is_kerberized; then
echo " *** This cluster is kerberized ***"
echo "KRB5_KTNAME = $KRB5_KTNAME"
echo "KRB5_CONFIG = $KRB5_CONFIG"
- echo "KRB5_TRACE = $KRB5_TRACE"
+ echo "KRB5_TRACE = ${KRB5_TRACE:-}"
echo "HADOOP_OPTS = $HADOOP_OPTS"
echo " *** This cluster is kerberized ***"
else
diff --git a/bin/kerberos/README-kerberos.md b/bin/kerberos/README-kerberos.md
new file mode 100644
index 0000000..7730a55
--- /dev/null
+++ b/bin/kerberos/README-kerberos.md
@@ -0,0 +1,21 @@
+# Kerberized Minicluster Setup
+The kerberized minicluster is enabled by setting IMPALA_KERBERIZE=true in
+impala-config-*.sh.
+
+After setting it you must run ./bin/create-test-configuration.sh then
+restart the minicluster (e.g. with testdata/bin/run-all.sh).
+
+The Kerberized minicluster requires a KDC to be setup and configured
+and service users to be added to the keytab at $KRB5_KTNAME.
+This step is not automated. experimental-kerberos-setup.sh automates
+some of the setup but is experimental at this point.
+
+# Limitations
+Not all minicluster services actually work or are kerberized at this point.
+I was able to run queries against pre-existing HMS tables stored in HDFS.
+
+Kerberos is finicky about hostnames - you may need to tweak your /etc/hosts
+to get hosts to authenticate if kerberos thinks that hostnames are distinct.
+
+Killing minicluster services seems to be broken when using a kerberized
+minicluster. You may need to manually kill the services.
diff --git a/bin/kerberos/experimental-kerberos-setup.sh b/bin/kerberos/experimental-kerberos-setup.sh
new file mode 100755
index 0000000..633eaf7
--- /dev/null
+++ b/bin/kerberos/experimental-kerberos-setup.sh
@@ -0,0 +1,115 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# The script automates some of the steps required to set up Kerberos servers and
+# keytabs for an Impala minicluster running on Ubuntu.
+#
+# It installs the kerberos clients and servers, creates principals for the user and
+# services and generates a keytab with all of the required users.
+#
+# This script is tested only on Ubuntu 18.04.
+#
+# References:
+# * https://linuxconfig.org/how-to-install-kerberos-kdc-server-and-client-on-ubuntu-18-04
+set -euo pipefail
+
+# Source impala-config.sh to get config variables, including those set by
+# testdata/bin/minikdc_env.sh.
+DIR=$(dirname "$0")
+. "${DIR}/../impala-config.sh"
+
+if [[ "$IMPALA_KERBERIZE" != "true" ]]; then
+ echo "IMPALA_KERBERIZE must be true, but was: $IMPALA_KERBERIZE"
+ exit 1
+fi
+
+echo "Installing required packages. Sudo password may be required."
+sudo apt install -y krb5-kdc krb5-admin-server krb5-config krb5-user
+
+export KRB5CCNAME=/tmp/krb5cc_${USER}_dev
+
+
+# IN /etc/krb5.conf
+# default_realm = EXAMPLE.COM
+echo "Please modify $KRB5_CONFIG to set default_realm to $MINIKDC_REALM."
+echo "Also add $MINIKDC_REALM to the [realms] section with kdc and admin_server set, e.g."
+echo "
+[realms]
+ EXAMPLE.COM = {
+ kdc = $HOSTNAME
+ admin_server = $HOSTNAME
+ }
+"
+
+read -p "Press enter to continue"
+
+# Create kerberos database for realm if not present.
+if sudo kadmin.local -q "list_principals"; then
+ echo "Using existing realm"
+else
+ echo "Creating new Kerberos database for realm"
+ sudo krb5_newrealm
+fi
+
+echo "Please add or uncomment this line in /etc/krb5kdc/kadm5.acl:"
+echo " */admin *"
+read -p "Press enter to continue"
+
+sudo service krb5-admin-server restart
+sudo service krb5-kdc restart
+
+# Adds a principal if not present, and add its key to the keytab.
+# This will prompt for a password.
+add_principal() {
+ local princ=$1
+ if ! sudo kadmin.local -q "get_principal $princ" | grep -F "Principal: $princ"
+ then
+ echo "Principal $princ does not exist, creating"
+ sudo kadmin.local -q "add_principal $princ"
+ fi
+ sudo kadmin.local -q "ktadd -k "$KRB5_KTNAME" $princ"
+}
+
+# Adds a service principal if not present, and add its key to the keytab.
+# This will generate a random key and not prompt for a password.
+add_service_principal() {
+ local princ=$1
+ if ! sudo kadmin.local -q "get_principal $princ" | grep -F "Principal: $princ"
+ then
+ echo "Principal $princ does not exist, creating"
+ sudo kadmin.local -q "add_principal -randkey $princ"
+ fi
+ sudo kadmin.local -q "ktadd -k "$KRB5_KTNAME" $princ"
+}
+
+# Create an admin user.
+add_principal $USER/admin@$MINIKDC_REALM
+
+# Add service principals.
+for svc in $USER hdfs mapred yarn HTTP hive hbase zookeeper impala impala-be
+do
+ add_service_principal $svc/localhost@$MINIKDC_REALM
+done
+
+# Kinit as the regular users.
+sudo chown $USER $KRB5_KTNAME
+kinit -kt $KRB5_KTNAME $USER/localhost@$MINIKDC_REALM
+
+echo "Keytab contents:"
+klist -kt $KRB5_KTNAME
diff --git a/bin/rat_exclude_files.txt b/bin/rat_exclude_files.txt
index c92142b..a129959 100644
--- a/bin/rat_exclude_files.txt
+++ b/bin/rat_exclude_files.txt
@@ -97,6 +97,7 @@ be/src/thirdparty/pcg-cpp-0.98/README.md
lib/python/README.md
lib/python/impala_py_lib/gdb/README.md
shell/packaging/README.md
+bin/kerberos/README-kerberos.md
# http://www.apache.org/legal/src-headers.html: "Test data for which the addition of a
# source header would cause the tests to fail."
diff --git a/bin/start-impala-cluster.py b/bin/start-impala-cluster.py
index d1459ad..4f1269a 100755
--- a/bin/start-impala-cluster.py
+++ b/bin/start-impala-cluster.py
@@ -400,6 +400,7 @@ def build_kerberos_args(daemon):
args.append("-principal={0}".format(os.getenv("MINIKDC_PRINC_IMPALA_BE")))
if os.getenv("MINIKDC_DEBUG", "") == "true":
args.append("-krb5_debug_file=/tmp/{0}.krb5_debug".format(daemon))
+ return args
def compute_impalad_mem_limit(cluster_size):
diff --git a/buildall.sh b/buildall.sh
index ab7d415..08ff4bf 100755
--- a/buildall.sh
+++ b/buildall.sh
@@ -58,7 +58,6 @@ FORMAT_SENTRY_POLICY_DB=0
FORMAT_RANGER_POLICY_DB=0
NEED_MINICLUSTER=0
START_IMPALA_CLUSTER=0
-IMPALA_KERBERIZE=0
SNAPSHOT_FILE=
METASTORE_SNAPSHOT_FILE=
CODE_COVERAGE=0
@@ -178,13 +177,6 @@ do
-start_impala_cluster)
START_IMPALA_CLUSTER=1
;;
- -k|-kerberize|-kerberos|-kerb)
- # Export to the environment for all child process tools
- export IMPALA_KERBERIZE=1
- set +u
- . ${MINIKDC_ENV}
- set -u
- ;;
-v|-debug)
echo "Running in Debug mode"
set -x
@@ -236,7 +228,6 @@ do
echo "[-snapshot_file <file name>] : Load test data from a snapshot file"
echo "[-metastore_snapshot_file <file_name>]: Load the hive metastore snapshot"
echo "[-so|-build_shared_libs] : Dynamically link executables (default is static)"
- echo "[-kerberize] : Enable kerberos on the cluster"
echo "[-fe_only] : Build just the frontend"
echo "[-ninja] : Use ninja instead of make"
echo "[-cmake_only] : Generate makefiles only, instead of doing a full build"
@@ -318,11 +309,13 @@ fi
# If we aren't kerberized then we certainly don't need to talk about
# re-sourcing impala-config.
-if [[ ${IMPALA_KERBERIZE} -eq 0 ]]; then
+if [[ ${IMPALA_KERBERIZE} = "true" ]]; then
+ . ${MINIKDC_ENV}
+else
NEEDS_RE_SOURCE_NOTE=0
fi
-if [[ ${IMPALA_KERBERIZE} -eq 1 &&
+if [[ ${IMPALA_KERBERIZE} = "true" &&
(${TESTDATA_ACTION} -eq 1 || ${TESTS_ACTION} -eq 1) ]]; then
echo "Running tests or loading test data is not supported for kerberized clusters."
echo "Please remove the -testdata flag and/or add the -skiptests flag."
@@ -464,11 +457,6 @@ reconfigure_test_cluster() {
"${IMPALA_HOME}/testdata/bin/kill-all.sh" || true
fi
- # Stop the minikdc if needed.
- if "${CLUSTER_DIR}/admin" is_kerberized; then
- "${IMPALA_HOME}/testdata/bin/minikdc.sh" stop
- fi
-
local CREATE_TEST_CONFIG_ARGS=""
if [[ "$FORMAT_SENTRY_POLICY_DB" -eq 1 ]]; then
CREATE_TEST_CONFIG_ARGS+=" -create_sentry_policy_db"
diff --git a/fe/src/test/resources/hive-site.xml.py b/fe/src/test/resources/hive-site.xml.py
index 4479d61..1f891fe 100644
--- a/fe/src/test/resources/hive-site.xml.py
+++ b/fe/src/test/resources/hive-site.xml.py
@@ -20,7 +20,7 @@
import os
hive_major_version = int(os.environ['IMPALA_HIVE_VERSION'][0])
-kerberize = os.environ.get('IMPALA_KERBERIZE') == '1'
+kerberize = os.environ.get('IMPALA_KERBERIZE') == 'true'
variant = os.environ.get('HIVE_VARIANT')
CONFIG = {
diff --git a/testdata/bin/minikdc.sh b/testdata/bin/minikdc.sh
deleted file mode 100755
index a3027cb..0000000
--- a/testdata/bin/minikdc.sh
+++ /dev/null
@@ -1,218 +0,0 @@
-#!/bin/bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-
-#
-# Start, stop, and related actions upon a "MiniKdc". This behaves as
-# a normal KDC, but is self-contained and we control the principals.
-#
-
-# Exits with failure, printing its arguments
-die() {
- echo "$0 ERROR: $@"
- exit 1
-}
-
-# Initial sanity checks
-initial_checks() {
- if [ "${MINIKDC_HOME}" = "" ]; then
- die "MINIKDC_HOME environment variable not set"
- fi
-
- if [ ! -x "${MINIKDC_BIN}" ]; then
- die "${MINIKDC_BIN} absent or not executable"
- fi
-}
-
-# Create a one-shot directory to hold minikdc files. Cleaned up in cleanup().
-make_working_dir() {
- if [ ! -d ${MINIKDC_SCRATCH_ROOT} ]; then
- mkdir -p ${MINIKDC_SCRATCH_ROOT}
- fi
- MINIKDC_TMP=`mktemp -d --tmpdir=${MINIKDC_SCRATCH_ROOT} minikdc.XXXXXXXXXX`
- if [ ! -d ${MINIKDC_TMP} ]; then
- die "Failure creating working directory"
- fi
- ln -s `basename ${MINIKDC_TMP}` ${MINIKDC_WD}
-}
-
-# Remove directory above. Called as a part of do_start() so the working
-# directory will exist until the next instance of the minikdc is started.
-cleanup() {
- rm -f ${MINIKDC_WD}
- rm -rf ${MINIKDC_SCRATCH_ROOT}/minikdc.*
-}
-
-# Echos the minikdc working directory
-get_working_dir() {
- if [ -d "${MINIKDC_WD}" ]; then
- echo "${MINIKDC_WD}"
- return 0
- else
- return 1
- fi
-}
-
-# Is the minikdc running?
-minikdc_running() {
- [ -d "${MINIKDC_WD}" ] || return 1
- pgrep -f llama-minikdc > /dev/null 2>&1
- return $?
-}
-
-# The MiniKdc reads from this properties file.
-write_properties() {
- cat > ${MINIKDC_PROPS} <<EOF
-org.name = ${MINIKDC_ORG}
-org.domain = ${MINIKDC_DOMAIN}
-kdc.port = ${MINIKDC_PORT}
-debug = ${MINIKDC_DEBUG}
-EOF
-}
-
-# Calls the "real" minikdc startup shell script from llama-minikdc; uses
-# nohup to detach it from the terminal.
-start_minikdc() {
- # Grabs all the principals and mutilates them into one desired string
- MINIKDC_PRINCIPALS=`env | grep MINIKDC_PRINC_ \
- | sed "s/@${MINIKDC_REALM}//g" \
- | awk -F= '{print $2}' \
- | tr '\n' ' '`
-
- if [ "${MINIKDC_DEBUG}" = "true" ]; then
- export MINIKDC_OPTS=-Dsun.security.krb5.debug=true
- fi
-
- nohup ${MINIKDC_BIN} ${MINIKDC_WD} ${MINIKDC_PROPS} ${MINIKDC_KEYTAB} \
- ${MINIKDC_PRINCIPALS} > ${MINIKDC_LOG} 2>&1 &
- sleep 1
- # Ought to be running instantaneously
- if ! minikdc_running; then
- die "MiniKdc failed to start"
- fi
-
- # But it takes a little while to become available
- TRIES=15
- STARTUPDONE=0
- while [ ${TRIES} -gt 0 ]; do
- if grep -q "^Standalone MiniKdc Running" ${MINIKDC_LOG}; then
- STARTUPDONE=1
- break
- fi
- printf .
- sleep 1
- TRIES=`expr ${TRIES} - 1`
- done
-
- if [ ${STARTUPDONE} -eq 0 ]; then
- do_stop
- die "MiniKdc failed to become available"
- fi
-}
-
-# Hunt down and destroy the minikdc. Gently at first, then aggressively.
-kill_minikdc() {
- TRIES=3
- DEAD=0
- while [ ${TRIES} -gt 0 ]; do
- if minikdc_running; then
- pkill -f llama-minikdc
- sleep 1
- else
- DEAD=1
- break;
- fi
- TRIES=`expr ${TRIES} - 1`
- done
-
- if [ ${DEAD} -eq 0 ]; then
- pkill -9 -f llama-minikdc
- sleep 1
- if minikdc_running; then
- die "Failed to kill the minikdc"
- fi
- fi
-}
-
-# Controlling function for 'start' command.
-do_start() {
- if minikdc_running; then
- echo "The minikdc is already running."
- exit 0
- fi
-
- cleanup
- make_working_dir
- write_properties
- start_minikdc
- echo "Minikdc started successfully."
- return 0
-}
-
-# Controlling function for 'stop' command.
-do_stop() {
- if ! minikdc_running; then
- echo "The minikdc is not running."
- return 0
- fi
-
- kill_minikdc
- echo "Minikdc stopped successfully."
-}
-
-#
-# Execution starts here.
-#
-
-if [ ! -f "${MINIKDC_ENV}" ]; then
- die "Can't find MINIKDC_ENV: ${MINIKDC_ENV}"
-fi
-. ${MINIKDC_ENV}
-
-# Interesting MiniKdc configuration:
-MINIKDC_BIN=${MINIKDC_HOME}/bin/minikdc
-MINIKDC_PROPS=${MINIKDC_WD}/properties.conf
-MINIKDC_LOG=${MINIKDC_WD}/minikdc.log
-MINIKDC_PORT=42574
-
-initial_checks
-
-case "$1" in
- start)
- do_start
- RV=$?
- ;;
- stop)
- do_stop
- RV=$?
- ;;
- restart)
- do_stop && do_start
- RV=$?
- ;;
- running|status)
- minikdc_running
- RV=$?
- ;;
- *)
- die "Usage: start|stop|restart|running|status"
- ;;
-esac
-
-exit ${RV}
diff --git a/testdata/bin/minikdc_env.sh b/testdata/bin/minikdc_env.sh
index 614636d..ecd902f 100644
--- a/testdata/bin/minikdc_env.sh
+++ b/testdata/bin/minikdc_env.sh
@@ -15,14 +15,18 @@
# specific language governing permissions and limitations
# under the License.
#
-# This file should be sourced by the minikdc and by impala-config.sh
-# when operating a kerberized development environment.
+# This file should be sourced by impala-config.sh when operating in a
+# kerberized development environment.
#
+# The minikdc name comes from an old KDC implementation that was used as a standalone
+# KDC for Impa. minicluster environments. That minikdc support bitrotted so now this
+# only supports a manually configured KDC (e.g. set up by
+# bin/experimental-kerberos-setup.sh).
# Twiddle this to turn on/off kerberos debug EVERYWHERE. Then restart
-# all deamons and the minikdc to enable lots of kerberos debug messages.
+# all daemons to enable lots of kerberos debug messages.
# Valid values are true | false
-export MINIKDC_DEBUG=false
+export MINIKDC_DEBUG=true
# MiniKdc realm configuration. Unfortunately, it breaks if realm isn't
# EXAMPLE.COM.
@@ -51,17 +55,17 @@ export MINIKDC_PRINC_ZOOK=zookeeper/localhost@${MINIKDC_REALM}
export MINIKDC_PRINC_IMPALA=impala/localhost@${MINIKDC_REALM}
export MINIKDC_PRINC_IMPALA_BE=impala-be/localhost@${MINIKDC_REALM}
export MINIKDC_PRINC_USER=${USER}/localhost@${MINIKDC_REALM}
-export MINIKDC_PRINC_LLAM=llama/localhost@${MINIKDC_REALM}
# Basic directory setup:
MINIKDC_SCRATCH_ROOT=${MINIKDC_SCRATCH_ROOT-${IMPALA_CLUSTER_LOGS_DIR}}
export MINIKDC_WD=${MINIKDC_SCRATCH_ROOT}/minikdc-workdir
-# The one big keytab created by the minikdc
-export MINIKDC_KEYTAB=${MINIKDC_WD}/keytab
+# The one big keytab that should contain all the service users
+export MINIKDC_KEYTAB=$IMPALA_HOME/impala.keytab
-# The krb5.conf file everyone needs to point at
-export MINIKDC_KRB5CONF=${MINIKDC_WD}/krb5.conf
+# The krb5.conf file that all the services should be using. We just point
+# to the system config file for now.
+export MINIKDC_KRB5CONF=/etc/krb5.conf
# These options tell kerberos related code to emit lots of debug messages
if [ ${MINIKDC_DEBUG} = "true" ]; then
@@ -72,7 +76,8 @@ else
export JAVA_KERBEROS_MAGIC=""
fi
-# Kerberos environment variables to talk to our MiniKdc
+# Kerberos environment variables so other kerberos clients will use our
+# kerberos setup.
export KRB5_KTNAME="${MINIKDC_KEYTAB}"
export KRB5_CONFIG="${MINIKDC_KRB5CONF}"
diff --git a/testdata/bin/run-all.sh b/testdata/bin/run-all.sh
index 3911e38..8bb666a 100755
--- a/testdata/bin/run-all.sh
+++ b/testdata/bin/run-all.sh
@@ -59,12 +59,19 @@ $IMPALA_HOME/testdata/bin/run-mini-dfs.sh ${HDFS_FORMAT_CLUSTER-} 2>&1 | \
# - One Yarn ResourceManager
# - Multiple Yarn NodeManagers, exactly one per HDFS DN
if [[ ${DEFAULT_FS} == "hdfs://${INTERNAL_LISTEN_HOST}:20500" ]]; then
- echo " --> Starting HBase"
- $IMPALA_HOME/testdata/bin/run-hbase.sh 2>&1 | \
- tee ${IMPALA_CLUSTER_LOGS_DIR}/run-hbase.log
+ # HBase does not work with kerberos yet.
+ if [[ "$IMPALA_KERBERIZE" != true ]]; then
+ echo " --> Starting HBase"
+ $IMPALA_HOME/testdata/bin/run-hbase.sh 2>&1 | \
+ tee ${IMPALA_CLUSTER_LOGS_DIR}/run-hbase.log
+ fi
echo " --> Starting Hive Server and Metastore Service"
- $IMPALA_HOME/testdata/bin/run-hive-server.sh 2>&1 | \
+ HIVE_FLAGS=
+ if [[ "$IMPALA_KERBERIZE" = true ]]; then
+ HIVE_FLAGS=" -only_metastore"
+ fi
+ $IMPALA_HOME/testdata/bin/run-hive-server.sh $HIVE_FLAGS 2>&1 | \
tee ${IMPALA_CLUSTER_LOGS_DIR}/run-hive-server.log
echo " --> Starting the Sentry Policy Server"
@@ -97,4 +104,4 @@ fi
echo " --> Starting Ranger Server"
"${IMPALA_HOME}/testdata/bin/run-ranger-server.sh" 2>&1 | \
- tee "${IMPALA_CLUSTER_LOGS_DIR}/run-ranger-server.log"
\ No newline at end of file
+ tee "${IMPALA_CLUSTER_LOGS_DIR}/run-ranger-server.log"
diff --git a/testdata/bin/run-hbase.sh b/testdata/bin/run-hbase.sh
index e7d67c9..39b369e 100755
--- a/testdata/bin/run-hbase.sh
+++ b/testdata/bin/run-hbase.sh
@@ -78,7 +78,7 @@ if ${CLUSTER_DIR}/admin is_kerberized; then
# These ultimately become args to java when it starts up hbase
K1="-Djava.security.krb5.conf=${KRB5_CONFIG}"
- K2="${JAVA_KRB5_DEBUG}"
+ K2="${JAVA_KRB5_DEBUG:-}"
K3="-Djava.security.auth.login.config=${HBASE_JAAS_CLIENT}"
K4="-Djava.security.auth.login.config=${HBASE_JAAS_SERVER}"
diff --git a/testdata/cluster/admin b/testdata/cluster/admin
index 8efa387..708599a 100755
--- a/testdata/cluster/admin
+++ b/testdata/cluster/admin
@@ -31,7 +31,6 @@ set -euo pipefail
. $IMPALA_HOME/bin/report_build_error.sh
setup_report_build_error
-: ${IMPALA_KERBERIZE=}
: ${INCLUDE_YARN=}
# For Hive 3, we require Yarn for Tez support.
@@ -39,10 +38,9 @@ if "$USE_CDP_HIVE"; then
INCLUDE_YARN=1
fi
-while getopts vky OPT; do
+while getopts vy OPT; do
case $OPT in
v) set -x;;
- k) export IMPALA_KERBERIZE=1;;
y) export INCLUDE_YARN=1;;
?) echo "Usage: $0 [-v (verbose) -k (kerberize) -y (yarn)] ACTION (see source...)"; exit 1;;
esac
@@ -109,7 +107,6 @@ EMPTY_NODE_DIRS=$(echo data/dfs/{dn,nn} var/{run,lib/hadoop-hdfs,log} \
var/{log,run}/kudu/{master,ts} var/lib/kudu/{master,ts}/{wal,data})
EASY_ACCESS_LOG_DIR="$IMPALA_CLUSTER_LOGS_DIR"
-MINIKDC_INIT=${IMPALA_HOME}/testdata/bin/minikdc.sh
FIND_EXECUTABLE_FILTER="-executable"
if ! find /dev/null "${FIND_EXECUTABLE_FILTER}" 2> /dev/null; then
@@ -127,12 +124,8 @@ fi
# ourselves a fresh TGT (Ticket-Granting-Ticket).
#
function kerberize_setup {
- if is_kerberized; then
- export IMPALA_KERBERIZE=1
- fi
-
# No kerberos? We're done.
- if [ -z "${IMPALA_KERBERIZE}" ]; then
+ if [[ "${IMPALA_KERBERIZE}" != "true" ]]; then
return
fi
@@ -170,9 +163,6 @@ function kerberize_setup {
exit 1
fi
- # Starting it has no effect if it's already started...
- ${MINIKDC_INIT} start
-
# Source the appropriate minikdc environment variables
. ${MINIKDC_ENV}
@@ -190,7 +180,7 @@ function kerberize_setup {
function is_kerberized {
HCONFSC="`get_hadoop_client_conf_dir`/core-site.xml"
if [ -f "${HCONFSC}" ]; then
- if grep -qi "Kerberos settings" "${HCONFSC}"; then
+ if grep -qi "kerberos" "${HCONFSC}"; then
# If the config exists and has kerberos things in it, treat as kerberized
return 0
fi
@@ -216,13 +206,9 @@ function create_cluster {
# Blow away existing config files (so we don't pick up kerberos settings)
rm -f `get_hadoop_client_conf_dir`/*
- if [ ! -z "${IMPALA_KERBERIZE}" ]; then
+ if [[ "${IMPALA_KERBERIZE}" = "true" ]]; then
kerberize_setup
echo "Creating Kerberized cluster."
- else
- # Stop the minikdc in case it was running
- . ${MINIKDC_ENV}
- ${MINIKDC_INIT} stop
fi
echo "Hostname for internal communication: ${INTERNAL_LISTEN_HOST}" \
@@ -295,7 +281,7 @@ function create_cluster {
# Chop out everything between the BEGIN/END Kerberos comments if
# not kerberized
- if [ -z "${IMPALA_KERBERIZE}" ]; then
+ if [[ "${IMPALA_KERBERIZE}" != "true" ]]; then
sed '/<!-- BEGIN Kerberos/,/END Kerberos settings -->/d' \
"$ACTUAL_PATH.1" > "$ACTUAL_PATH"
else
@@ -329,11 +315,10 @@ function start_cluster {
return 1
fi
- if [ ! -z "${IMPALA_KERBERIZE}" ] && ! is_kerberized; then
+ if [[ "${IMPALA_KERBERIZE}" = "true" ]] && ! is_kerberized; then
echo "Kerberized start requested, but the config files aren't set up"
- echo "for kerberos. Destroy the cluster and rebuild it:"
- echo " --> $ ./testdata/cluster/admin delete_cluster"
- echo " --> $ IMPALA_KERBERIZE=1 ./testdata/cluster/admin create_cluster"
+ echo "for kerberos. You must regenerate configurations with "
+ echo " ./bin/create-test-configuration.sh"
exit 1
fi
diff --git a/testdata/cluster/node_templates/common/etc/hadoop/conf/core-site.xml.py b/testdata/cluster/node_templates/common/etc/hadoop/conf/core-site.xml.py
index 6af28f4..3a5faa3 100644
--- a/testdata/cluster/node_templates/common/etc/hadoop/conf/core-site.xml.py
+++ b/testdata/cluster/node_templates/common/etc/hadoop/conf/core-site.xml.py
@@ -20,7 +20,7 @@
import os
import sys
-kerberize = os.environ.get('IMPALA_KERBERIZE') == '1'
+kerberize = os.environ.get('IMPALA_KERBERIZE') == 'true'
target_filesystem = os.environ.get('TARGET_FILESYSTEM')
compression_codecs = [
diff --git a/testdata/cluster/node_templates/common/etc/hadoop/conf/yarn-site.xml.py b/testdata/cluster/node_templates/common/etc/hadoop/conf/yarn-site.xml.py
index 305feb3..769685f 100644
--- a/testdata/cluster/node_templates/common/etc/hadoop/conf/yarn-site.xml.py
+++ b/testdata/cluster/node_templates/common/etc/hadoop/conf/yarn-site.xml.py
@@ -20,7 +20,7 @@
import os
import sys
-kerberize = os.environ.get('IMPALA_KERBERIZE') == '1'
+kerberize = os.environ.get('IMPALA_KERBERIZE') == 'true'
hive_major_version = int(os.environ['IMPALA_HIVE_VERSION'][0])