You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2020/02/08 17:44:25 UTC

[impala] 03/04: IMPALA-9361: manually configured kerberized minicluster

This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 6f150d383c64d9adb8e11c73c5bb18d3f5b6ff97
Author: Tim Armstrong <ta...@cloudera.com>
AuthorDate: Mon Feb 3 14:51:25 2020 -0800

    IMPALA-9361: manually configured kerberized minicluster
    
    The kerberized minicluster is enabled by setting
    IMPALA_KERBERIZE=true in impala-config-*.sh.
    
    After setting it you must run ./bin/create-test-configuration.sh
    then restart minicluster.
    
    This adds a script to partially automate setup of a local KDC,
    in lieu of the unmaintained minikdc support (which has been ripped
    out).
    
    Testing:
    I was able to run some queries against pre-created HDFS tables
    with kerberos enabled.
    
    Change-Id: Ib34101d132e9c9d59da14537edf7d096f25e9bee
    Reviewed-on: http://gerrit.cloudera.org:8080/15159
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 bin/bootstrap_toolchain.py                         |  10 -
 bin/create-test-configuration.sh                   |  14 +-
 bin/impala-config.sh                               |   8 +-
 bin/kerberos/README-kerberos.md                    |  21 ++
 bin/kerberos/experimental-kerberos-setup.sh        | 115 +++++++++++
 bin/rat_exclude_files.txt                          |   1 +
 bin/start-impala-cluster.py                        |   1 +
 buildall.sh                                        |  20 +-
 fe/src/test/resources/hive-site.xml.py             |   2 +-
 testdata/bin/minikdc.sh                            | 218 ---------------------
 testdata/bin/minikdc_env.sh                        |  25 ++-
 testdata/bin/run-all.sh                            |  17 +-
 testdata/bin/run-hbase.sh                          |   2 +-
 testdata/cluster/admin                             |  31 +--
 .../common/etc/hadoop/conf/core-site.xml.py        |   2 +-
 .../common/etc/hadoop/conf/yarn-site.xml.py        |   2 +-
 16 files changed, 188 insertions(+), 301 deletions(-)

diff --git a/bin/bootstrap_toolchain.py b/bin/bootstrap_toolchain.py
index d6a58ee..546f388 100755
--- a/bin/bootstrap_toolchain.py
+++ b/bin/bootstrap_toolchain.py
@@ -629,15 +629,6 @@ def get_hadoop_downloads():
   # Ranger is always CDP
   cluster_components.append(CdpComponent("ranger",
                                          archive_basename_tmpl="ranger-${version}-admin"))
-
-  # llama-minikdc is used for testing and not something that Impala needs to be built
-  # against. It does not get updated very frequently unlike the other CDH components.
-  # It is stored in a special location compared to other components.
-  toolchain_host = os.environ["IMPALA_TOOLCHAIN_HOST"]
-  download_path_prefix = "https://{0}/build/cdh_components/".format(toolchain_host)
-  destination_basedir = os.environ["CDH_COMPONENTS_HOME"]
-  cluster_components += [EnvVersionedPackage("llama-minikdc", download_path_prefix,
-      destination_basedir)]
   return cluster_components
 
 
@@ -686,7 +677,6 @@ def main():
   If false, most components get the CDH versions based on the $CDH_BUILD_NUMBER.
   The exceptions are:
   - sentry (always downloaded from $IMPALA_TOOLCHAIN_HOST for a given $CDH_BUILD_NUMBER)
-  - llama-minikdc (always downloaded from $TOOLCHAIN_HOST)
   - ranger (always downloaded from $IMPALA_TOOLCHAIN_HOST for a given $CDP_BUILD_NUMBER)
   - kudu (currently always downloaded from $IMPALA_TOOLCHAIN_HOST for a given
     $CDH_BUILD_NUMBER)
diff --git a/bin/create-test-configuration.sh b/bin/create-test-configuration.sh
index d1f5dc8..32d56c3 100755
--- a/bin/create-test-configuration.sh
+++ b/bin/create-test-configuration.sh
@@ -46,7 +46,7 @@ function generate_config {
   perl -wpl -e 's/\$\{([^}]+)\}/defined $ENV{$1} ? $ENV{$1} : $&/eg' \
       "${GCIN}" > "${GCOUT}.tmp"
 
-  if [ "${IMPALA_KERBERIZE}" = "" ]; then
+  if [[ "${IMPALA_KERBERIZE}" != "true" ]]; then
     sed '/<!-- BEGIN Kerberos/,/END Kerberos settings -->/d' \
         "${GCOUT}.tmp" > "${GCOUT}"
   else
@@ -67,7 +67,6 @@ function generate_config {
 CREATE_METASTORE=0
 CREATE_SENTRY_POLICY_DB=0
 CREATE_RANGER_POLICY_DB=0
-: ${IMPALA_KERBERIZE=}
 
 # parse command line options
 for ARG in $*
@@ -82,15 +81,10 @@ do
     -create_ranger_policy_db)
       CREATE_RANGER_POLICY_DB=1
       ;;
-    -k|-kerberize|-kerberos|-kerb)
-      # This could also come in through the environment...
-      export IMPALA_KERBERIZE=1
-      ;;
     -help|*)
       echo "[-create_metastore] : If true, creates a new metastore."
       echo "[-create_sentry_policy_db] : If true, creates a new sentry policy db."
       echo "[-create_ranger_policy_db] : If true, creates a new Ranger policy db."
-      echo "[-kerberize] : Enable kerberos on the cluster"
       exit 1
       ;;
   esac
@@ -104,7 +98,7 @@ fi
 
 ${CLUSTER_DIR}/admin create_cluster
 
-if [ ! -z "${IMPALA_KERBERIZE}" ]; then
+if [[ "${IMPALA_KERBERIZE}" = "true" ]]; then
   # Sanity check...
   if ! ${CLUSTER_DIR}/admin is_kerberized; then
     echo "Kerberized cluster not created, even though told to."
@@ -192,7 +186,7 @@ for file in core-site.xml hdfs-site.xml yarn-site.xml ; do
   ln -s ${CLUSTER_HADOOP_CONF_DIR}/$file
 done
 
-if ${CLUSTER_DIR}/admin is_kerberized; then
+if [[ "${IMPALA_KERBERIZE}" = "true" ]]; then
   # KERBEROS TODO: Without this, the yarn daemons can see these
   # files, but mapreduce jobs *cannot* see these files.  This seems
   # strange, but making these symlinks also results in data loading
@@ -214,7 +208,7 @@ for SENTRY_VARIANT in oo oo_nogrant no_oo ; do
       sentry-site_${SENTRY_VARIANT}.xml
 done
 
-if [ ! -z "${IMPALA_KERBERIZE}" ]; then
+if [[ "${IMPALA_KERBERIZE}" = "true" ]]; then
   generate_config hbase-jaas-server.conf.template hbase-jaas-server.conf
   generate_config hbase-jaas-client.conf.template hbase-jaas-client.conf
 fi
diff --git a/bin/impala-config.sh b/bin/impala-config.sh
index 1ad556f..c21088e 100755
--- a/bin/impala-config.sh
+++ b/bin/impala-config.sh
@@ -186,7 +186,6 @@ export CDP_KNOX_VERSION=1.3.0.7.0.2.0-212
 
 export IMPALA_PARQUET_VERSION=1.10.99-cdh6.x-SNAPSHOT
 export IMPALA_AVRO_JAVA_VERSION=1.8.2-cdh6.x-SNAPSHOT
-export IMPALA_LLAMA_MINIKDC_VERSION=1.0.0
 export IMPALA_KITE_VERSION=1.0.0-cdh6.x-SNAPSHOT
 export IMPALA_KUDU_JAVA_VERSION=1.11.0-cdh6.x-SNAPSHOT
 export IMPALA_ORC_JAVA_VERSION=1.6.2
@@ -199,7 +198,8 @@ unset IMPALA_HIVE_URL
 unset IMPALA_KUDU_URL
 unset IMPALA_KUDU_VERSION
 unset IMPALA_SENTRY_URL
-unset IMPALA_LLAMA_MINIKDC_URL
+
+export IMPALA_KERBERIZE=false
 
 # Source the branch and local config override files here to override any
 # variables above or any variables below that allow overriding via environment
@@ -603,7 +603,6 @@ HADOOP_CLASSPATH="${HADOOP_CLASSPATH}:${HADOOP_HOME}/share/hadoop/tools/lib/*"
 export MINI_DFS_BASE_DATA_DIR="$IMPALA_HOME/cdh-${CDH_MAJOR_VERSION}-hdfs-data"
 export PATH="$HADOOP_HOME/bin:$PATH"
 
-export MINIKDC_HOME="$CDH_COMPONENTS_HOME/llama-minikdc-${IMPALA_LLAMA_MINIKDC_VERSION}"
 export SENTRY_HOME="$CDH_COMPONENTS_HOME/sentry-${IMPALA_SENTRY_VERSION}"
 export SENTRY_CONF_DIR="$IMPALA_HOME/fe/src/test/resources"
 
@@ -796,7 +795,6 @@ echo "SENTRY_HOME             = $SENTRY_HOME"
 echo "SENTRY_CONF_DIR         = $SENTRY_CONF_DIR"
 echo "RANGER_HOME             = $RANGER_HOME"
 echo "RANGER_CONF_DIR         = $RANGER_CONF_DIR "
-echo "MINIKDC_HOME            = $MINIKDC_HOME"
 echo "THRIFT_HOME             = $THRIFT_HOME"
 echo "HADOOP_LZO              = $HADOOP_LZO"
 echo "IMPALA_LZO              = $IMPALA_LZO"
@@ -834,7 +832,7 @@ if "${CLUSTER_DIR}/admin" is_kerberized; then
   echo " *** This cluster is kerberized ***"
   echo "KRB5_KTNAME            = $KRB5_KTNAME"
   echo "KRB5_CONFIG            = $KRB5_CONFIG"
-  echo "KRB5_TRACE             = $KRB5_TRACE"
+  echo "KRB5_TRACE             = ${KRB5_TRACE:-}"
   echo "HADOOP_OPTS            = $HADOOP_OPTS"
   echo " *** This cluster is kerberized ***"
 else
diff --git a/bin/kerberos/README-kerberos.md b/bin/kerberos/README-kerberos.md
new file mode 100644
index 0000000..7730a55
--- /dev/null
+++ b/bin/kerberos/README-kerberos.md
@@ -0,0 +1,21 @@
+# Kerberized Minicluster Setup
+The kerberized minicluster is enabled by setting IMPALA_KERBERIZE=true in
+impala-config-*.sh.
+
+After setting it you must run ./bin/create-test-configuration.sh then
+restart the minicluster (e.g. with testdata/bin/run-all.sh).
+
+The Kerberized minicluster requires a KDC to be setup and configured
+and service users to be added to the keytab at $KRB5_KTNAME.
+This step is not automated. experimental-kerberos-setup.sh automates
+some of the setup but is experimental at this point.
+
+# Limitations
+Not all minicluster services actually work or are kerberized at this point.
+I was able to run queries against pre-existing HMS tables stored in HDFS.
+
+Kerberos is finicky about hostnames - you may need to tweak your /etc/hosts
+to get hosts to authenticate if kerberos thinks that hostnames are distinct.
+
+Killing minicluster services seems to be broken when using a kerberized
+minicluster. You may need to manually kill the services.
diff --git a/bin/kerberos/experimental-kerberos-setup.sh b/bin/kerberos/experimental-kerberos-setup.sh
new file mode 100755
index 0000000..633eaf7
--- /dev/null
+++ b/bin/kerberos/experimental-kerberos-setup.sh
@@ -0,0 +1,115 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# The script automates some of the steps required to set up Kerberos servers and
+# keytabs for an Impala minicluster running on Ubuntu.
+#
+# It installs the kerberos clients and servers, creates principals for the user and
+# services and generates a keytab with all of the required users.
+#
+# This script is tested only on Ubuntu 18.04.
+#
+# References:
+# * https://linuxconfig.org/how-to-install-kerberos-kdc-server-and-client-on-ubuntu-18-04
+set -euo pipefail
+
+# Source impala-config.sh to get config variables, including those set by
+# testdata/bin/minikdc_env.sh.
+DIR=$(dirname "$0")
+. "${DIR}/../impala-config.sh"
+
+if [[ "$IMPALA_KERBERIZE" != "true" ]]; then
+  echo "IMPALA_KERBERIZE must be true, but was: $IMPALA_KERBERIZE"
+  exit 1
+fi
+
+echo "Installing required packages. Sudo password may be required."
+sudo apt install -y krb5-kdc krb5-admin-server krb5-config krb5-user
+
+export KRB5CCNAME=/tmp/krb5cc_${USER}_dev
+
+
+# IN /etc/krb5.conf
+# default_realm = EXAMPLE.COM
+echo "Please modify $KRB5_CONFIG to set default_realm to $MINIKDC_REALM."
+echo "Also add $MINIKDC_REALM to the [realms] section with kdc and admin_server set, e.g."
+echo "
+[realms]
+  EXAMPLE.COM = {
+    kdc = $HOSTNAME
+    admin_server = $HOSTNAME
+  }
+"
+
+read -p "Press enter to continue"
+
+# Create kerberos database for realm if not present.
+if sudo kadmin.local -q "list_principals"; then
+  echo "Using existing realm"
+else
+  echo "Creating new Kerberos database for realm"
+  sudo krb5_newrealm
+fi
+
+echo "Please add or uncomment this line in /etc/krb5kdc/kadm5.acl:"
+echo "  */admin *"
+read -p "Press enter to continue"
+
+sudo service krb5-admin-server restart
+sudo service krb5-kdc restart
+
+# Adds a principal if not present, and add its key to the keytab.
+# This will prompt for a password.
+add_principal() {
+  local princ=$1
+  if ! sudo kadmin.local -q "get_principal $princ" | grep -F "Principal: $princ"
+  then
+    echo "Principal $princ does not exist, creating"
+    sudo kadmin.local -q "add_principal $princ"
+  fi
+  sudo kadmin.local -q "ktadd -k "$KRB5_KTNAME" $princ"
+}
+
+# Adds a service principal if not present, and add its key to the keytab.
+# This will generate a random key and not prompt for a password.
+add_service_principal() {
+  local princ=$1
+  if ! sudo kadmin.local -q "get_principal $princ" | grep -F "Principal: $princ"
+  then
+    echo "Principal $princ does not exist, creating"
+    sudo kadmin.local -q "add_principal -randkey $princ"
+  fi
+  sudo kadmin.local -q "ktadd -k "$KRB5_KTNAME" $princ"
+}
+
+# Create an admin user.
+add_principal $USER/admin@$MINIKDC_REALM
+
+# Add service principals.
+for svc in $USER hdfs mapred yarn HTTP hive hbase zookeeper impala impala-be
+do
+  add_service_principal $svc/localhost@$MINIKDC_REALM
+done
+
+# Kinit as the regular users.
+sudo chown $USER $KRB5_KTNAME
+kinit -kt $KRB5_KTNAME $USER/localhost@$MINIKDC_REALM
+
+echo "Keytab contents:"
+klist -kt $KRB5_KTNAME
diff --git a/bin/rat_exclude_files.txt b/bin/rat_exclude_files.txt
index c92142b..a129959 100644
--- a/bin/rat_exclude_files.txt
+++ b/bin/rat_exclude_files.txt
@@ -97,6 +97,7 @@ be/src/thirdparty/pcg-cpp-0.98/README.md
 lib/python/README.md
 lib/python/impala_py_lib/gdb/README.md
 shell/packaging/README.md
+bin/kerberos/README-kerberos.md
 
 # http://www.apache.org/legal/src-headers.html: "Test data for which the addition of a
 # source header would cause the tests to fail."
diff --git a/bin/start-impala-cluster.py b/bin/start-impala-cluster.py
index d1459ad..4f1269a 100755
--- a/bin/start-impala-cluster.py
+++ b/bin/start-impala-cluster.py
@@ -400,6 +400,7 @@ def build_kerberos_args(daemon):
     args.append("-principal={0}".format(os.getenv("MINIKDC_PRINC_IMPALA_BE")))
   if os.getenv("MINIKDC_DEBUG", "") == "true":
     args.append("-krb5_debug_file=/tmp/{0}.krb5_debug".format(daemon))
+  return args
 
 
 def compute_impalad_mem_limit(cluster_size):
diff --git a/buildall.sh b/buildall.sh
index ab7d415..08ff4bf 100755
--- a/buildall.sh
+++ b/buildall.sh
@@ -58,7 +58,6 @@ FORMAT_SENTRY_POLICY_DB=0
 FORMAT_RANGER_POLICY_DB=0
 NEED_MINICLUSTER=0
 START_IMPALA_CLUSTER=0
-IMPALA_KERBERIZE=0
 SNAPSHOT_FILE=
 METASTORE_SNAPSHOT_FILE=
 CODE_COVERAGE=0
@@ -178,13 +177,6 @@ do
     -start_impala_cluster)
       START_IMPALA_CLUSTER=1
       ;;
-    -k|-kerberize|-kerberos|-kerb)
-      # Export to the environment for all child process tools
-      export IMPALA_KERBERIZE=1
-      set +u
-      . ${MINIKDC_ENV}
-      set -u
-      ;;
     -v|-debug)
       echo "Running in Debug mode"
       set -x
@@ -236,7 +228,6 @@ do
       echo "[-snapshot_file <file name>] : Load test data from a snapshot file"
       echo "[-metastore_snapshot_file <file_name>]: Load the hive metastore snapshot"
       echo "[-so|-build_shared_libs] : Dynamically link executables (default is static)"
-      echo "[-kerberize] : Enable kerberos on the cluster"
       echo "[-fe_only] : Build just the frontend"
       echo "[-ninja] : Use ninja instead of make"
       echo "[-cmake_only] : Generate makefiles only, instead of doing a full build"
@@ -318,11 +309,13 @@ fi
 
 # If we aren't kerberized then we certainly don't need to talk about
 # re-sourcing impala-config.
-if [[ ${IMPALA_KERBERIZE} -eq 0 ]]; then
+if [[ ${IMPALA_KERBERIZE} = "true" ]]; then
+  . ${MINIKDC_ENV}
+else
   NEEDS_RE_SOURCE_NOTE=0
 fi
 
-if [[ ${IMPALA_KERBERIZE} -eq 1 &&
+if [[ ${IMPALA_KERBERIZE} = "true" &&
   (${TESTDATA_ACTION} -eq 1 || ${TESTS_ACTION} -eq 1) ]]; then
   echo "Running tests or loading test data is not supported for kerberized clusters."
   echo "Please remove the -testdata flag and/or add the -skiptests flag."
@@ -464,11 +457,6 @@ reconfigure_test_cluster() {
     "${IMPALA_HOME}/testdata/bin/kill-all.sh" || true
   fi
 
-  # Stop the minikdc if needed.
-  if "${CLUSTER_DIR}/admin" is_kerberized; then
-      "${IMPALA_HOME}/testdata/bin/minikdc.sh" stop
-  fi
-
   local CREATE_TEST_CONFIG_ARGS=""
   if [[ "$FORMAT_SENTRY_POLICY_DB" -eq 1 ]]; then
     CREATE_TEST_CONFIG_ARGS+=" -create_sentry_policy_db"
diff --git a/fe/src/test/resources/hive-site.xml.py b/fe/src/test/resources/hive-site.xml.py
index 4479d61..1f891fe 100644
--- a/fe/src/test/resources/hive-site.xml.py
+++ b/fe/src/test/resources/hive-site.xml.py
@@ -20,7 +20,7 @@
 import os
 
 hive_major_version = int(os.environ['IMPALA_HIVE_VERSION'][0])
-kerberize = os.environ.get('IMPALA_KERBERIZE') == '1'
+kerberize = os.environ.get('IMPALA_KERBERIZE') == 'true'
 variant = os.environ.get('HIVE_VARIANT')
 
 CONFIG = {
diff --git a/testdata/bin/minikdc.sh b/testdata/bin/minikdc.sh
deleted file mode 100755
index a3027cb..0000000
--- a/testdata/bin/minikdc.sh
+++ /dev/null
@@ -1,218 +0,0 @@
-#!/bin/bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-
-#
-# Start, stop, and related actions upon a "MiniKdc".  This behaves as
-# a normal KDC, but is self-contained and we control the principals.
-#
-
-# Exits with failure, printing its arguments
-die() {
-    echo "$0 ERROR: $@"
-    exit 1
-}
-
-# Initial sanity checks
-initial_checks() {
-    if [ "${MINIKDC_HOME}" = "" ]; then
-        die "MINIKDC_HOME environment variable not set"
-    fi
-
-    if [ ! -x "${MINIKDC_BIN}" ]; then
-        die "${MINIKDC_BIN} absent or not executable"
-    fi
-}
-
-# Create a one-shot directory to hold minikdc files.  Cleaned up in cleanup().
-make_working_dir() {
-    if [ ! -d ${MINIKDC_SCRATCH_ROOT} ]; then
-        mkdir -p ${MINIKDC_SCRATCH_ROOT}
-    fi
-    MINIKDC_TMP=`mktemp -d --tmpdir=${MINIKDC_SCRATCH_ROOT} minikdc.XXXXXXXXXX`
-    if [ ! -d ${MINIKDC_TMP} ]; then
-        die "Failure creating working directory"
-    fi
-    ln -s `basename ${MINIKDC_TMP}` ${MINIKDC_WD}
-}
-
-# Remove directory above.  Called as a part of do_start() so the working
-# directory will exist until the next instance of the minikdc is started.
-cleanup() {
-    rm -f ${MINIKDC_WD}
-    rm -rf ${MINIKDC_SCRATCH_ROOT}/minikdc.*
-}
-
-# Echos the minikdc working directory
-get_working_dir() {
-    if [ -d "${MINIKDC_WD}" ]; then
-        echo "${MINIKDC_WD}"
-        return 0
-    else
-        return 1
-    fi
-}
-
-# Is the minikdc running?
-minikdc_running() {
-    [ -d "${MINIKDC_WD}" ] || return 1
-    pgrep -f llama-minikdc > /dev/null 2>&1
-    return $?
-}
-
-# The MiniKdc reads from this properties file.
-write_properties() {
-    cat > ${MINIKDC_PROPS} <<EOF
-org.name = ${MINIKDC_ORG}
-org.domain = ${MINIKDC_DOMAIN}
-kdc.port = ${MINIKDC_PORT}
-debug = ${MINIKDC_DEBUG}
-EOF
-}
-
-# Calls the "real" minikdc startup shell script from llama-minikdc; uses
-# nohup to detach it from the terminal.
-start_minikdc() {
-    # Grabs all the principals and mutilates them into one desired string
-    MINIKDC_PRINCIPALS=`env | grep MINIKDC_PRINC_ \
-        | sed "s/@${MINIKDC_REALM}//g" \
-        | awk -F= '{print $2}' \
-        | tr '\n' ' '`
-
-    if [ "${MINIKDC_DEBUG}" = "true" ]; then
-        export MINIKDC_OPTS=-Dsun.security.krb5.debug=true
-    fi
-
-    nohup ${MINIKDC_BIN} ${MINIKDC_WD} ${MINIKDC_PROPS} ${MINIKDC_KEYTAB} \
-        ${MINIKDC_PRINCIPALS} > ${MINIKDC_LOG} 2>&1 &
-    sleep 1
-    # Ought to be running instantaneously
-    if ! minikdc_running; then
-        die "MiniKdc failed to start"
-    fi
-
-    # But it takes a little while to become available
-    TRIES=15
-    STARTUPDONE=0
-    while [ ${TRIES} -gt 0 ]; do
-        if grep -q "^Standalone MiniKdc Running" ${MINIKDC_LOG}; then
-            STARTUPDONE=1
-            break
-        fi
-        printf .
-        sleep 1
-        TRIES=`expr ${TRIES} - 1`
-    done
-
-    if [ ${STARTUPDONE} -eq 0 ]; then
-        do_stop
-        die "MiniKdc failed to become available"
-    fi
-}
-
-# Hunt down and destroy the minikdc.  Gently at first, then aggressively.
-kill_minikdc() {
-    TRIES=3
-    DEAD=0
-    while [ ${TRIES} -gt 0 ]; do
-        if minikdc_running; then
-            pkill -f llama-minikdc
-            sleep 1
-        else
-            DEAD=1
-            break;
-        fi
-        TRIES=`expr ${TRIES} - 1`
-    done
-
-    if [ ${DEAD} -eq 0 ]; then
-        pkill -9 -f llama-minikdc
-        sleep 1
-        if minikdc_running; then
-            die "Failed to kill the minikdc"
-        fi
-    fi
-}
-
-# Controlling function for 'start' command.
-do_start() {
-    if minikdc_running; then
-        echo "The minikdc is already running."
-        exit 0
-    fi
-
-    cleanup
-    make_working_dir
-    write_properties
-    start_minikdc
-    echo "Minikdc started successfully."
-    return 0
-}
-
-# Controlling function for 'stop' command.
-do_stop() {
-    if ! minikdc_running; then
-        echo "The minikdc is not running."
-        return 0
-    fi
-
-    kill_minikdc
-    echo "Minikdc stopped successfully."
-}
-
-#
-# Execution starts here.
-#
-
-if [ ! -f "${MINIKDC_ENV}" ]; then
-    die "Can't find MINIKDC_ENV: ${MINIKDC_ENV}"
-fi
-. ${MINIKDC_ENV}
-
-# Interesting MiniKdc configuration:
-MINIKDC_BIN=${MINIKDC_HOME}/bin/minikdc
-MINIKDC_PROPS=${MINIKDC_WD}/properties.conf
-MINIKDC_LOG=${MINIKDC_WD}/minikdc.log
-MINIKDC_PORT=42574
-
-initial_checks
-
-case "$1" in
-    start)
-        do_start
-        RV=$?
-        ;;
-    stop)
-        do_stop
-        RV=$?
-        ;;
-    restart)
-        do_stop && do_start
-        RV=$?
-        ;;
-    running|status)
-        minikdc_running
-        RV=$?
-        ;;
-    *)
-        die "Usage: start|stop|restart|running|status"
-        ;;
-esac
-
-exit ${RV}
diff --git a/testdata/bin/minikdc_env.sh b/testdata/bin/minikdc_env.sh
index 614636d..ecd902f 100644
--- a/testdata/bin/minikdc_env.sh
+++ b/testdata/bin/minikdc_env.sh
@@ -15,14 +15,18 @@
 # specific language governing permissions and limitations
 # under the License.
 #
-# This file should be sourced by the minikdc and by impala-config.sh
-# when operating a kerberized development environment.
+# This file should be sourced by impala-config.sh when operating in a
+# kerberized development environment.
 #
+# The minikdc name comes from an old KDC implementation that was used as a standalone
+# KDC for Impa. minicluster environments. That minikdc support bitrotted so now this
+# only supports a manually configured KDC (e.g. set up by
+# bin/experimental-kerberos-setup.sh).
 
 # Twiddle this to turn on/off kerberos debug EVERYWHERE.  Then restart
-# all deamons and the minikdc to enable lots of kerberos debug messages.
+# all daemons to enable lots of kerberos debug messages.
 # Valid values are true | false
-export MINIKDC_DEBUG=false
+export MINIKDC_DEBUG=true
 
 # MiniKdc realm configuration.  Unfortunately, it breaks if realm isn't
 # EXAMPLE.COM.
@@ -51,17 +55,17 @@ export MINIKDC_PRINC_ZOOK=zookeeper/localhost@${MINIKDC_REALM}
 export MINIKDC_PRINC_IMPALA=impala/localhost@${MINIKDC_REALM}
 export MINIKDC_PRINC_IMPALA_BE=impala-be/localhost@${MINIKDC_REALM}
 export MINIKDC_PRINC_USER=${USER}/localhost@${MINIKDC_REALM}
-export MINIKDC_PRINC_LLAM=llama/localhost@${MINIKDC_REALM}
 
 # Basic directory setup:
 MINIKDC_SCRATCH_ROOT=${MINIKDC_SCRATCH_ROOT-${IMPALA_CLUSTER_LOGS_DIR}}
 export MINIKDC_WD=${MINIKDC_SCRATCH_ROOT}/minikdc-workdir
 
-# The one big keytab created by the minikdc
-export MINIKDC_KEYTAB=${MINIKDC_WD}/keytab
+# The one big keytab that should contain all the service users
+export MINIKDC_KEYTAB=$IMPALA_HOME/impala.keytab
 
-# The krb5.conf file everyone needs to point at
-export MINIKDC_KRB5CONF=${MINIKDC_WD}/krb5.conf
+# The krb5.conf file that all the services should be using. We just point
+# to the system config file for now.
+export MINIKDC_KRB5CONF=/etc/krb5.conf
 
 # These options tell kerberos related code to emit lots of debug messages
 if [ ${MINIKDC_DEBUG} = "true" ]; then
@@ -72,7 +76,8 @@ else
     export JAVA_KERBEROS_MAGIC=""
 fi
 
-# Kerberos environment variables to talk to our MiniKdc
+# Kerberos environment variables so other kerberos clients will use our
+# kerberos setup.
 export KRB5_KTNAME="${MINIKDC_KEYTAB}"
 export KRB5_CONFIG="${MINIKDC_KRB5CONF}"
 
diff --git a/testdata/bin/run-all.sh b/testdata/bin/run-all.sh
index 3911e38..8bb666a 100755
--- a/testdata/bin/run-all.sh
+++ b/testdata/bin/run-all.sh
@@ -59,12 +59,19 @@ $IMPALA_HOME/testdata/bin/run-mini-dfs.sh ${HDFS_FORMAT_CLUSTER-} 2>&1 | \
 # - One Yarn ResourceManager
 # - Multiple Yarn NodeManagers, exactly one per HDFS DN
 if [[ ${DEFAULT_FS} == "hdfs://${INTERNAL_LISTEN_HOST}:20500" ]]; then
-  echo " --> Starting HBase"
-  $IMPALA_HOME/testdata/bin/run-hbase.sh 2>&1 | \
-      tee ${IMPALA_CLUSTER_LOGS_DIR}/run-hbase.log
+  # HBase does not work with kerberos yet.
+  if [[ "$IMPALA_KERBERIZE" != true ]]; then
+    echo " --> Starting HBase"
+    $IMPALA_HOME/testdata/bin/run-hbase.sh 2>&1 | \
+        tee ${IMPALA_CLUSTER_LOGS_DIR}/run-hbase.log
+  fi
 
   echo " --> Starting Hive Server and Metastore Service"
-  $IMPALA_HOME/testdata/bin/run-hive-server.sh 2>&1 | \
+  HIVE_FLAGS=
+  if [[ "$IMPALA_KERBERIZE" = true ]]; then
+    HIVE_FLAGS=" -only_metastore"
+  fi
+  $IMPALA_HOME/testdata/bin/run-hive-server.sh $HIVE_FLAGS 2>&1 | \
       tee ${IMPALA_CLUSTER_LOGS_DIR}/run-hive-server.log
 
   echo " --> Starting the Sentry Policy Server"
@@ -97,4 +104,4 @@ fi
 
 echo " --> Starting Ranger Server"
 "${IMPALA_HOME}/testdata/bin/run-ranger-server.sh" 2>&1 | \
-    tee "${IMPALA_CLUSTER_LOGS_DIR}/run-ranger-server.log"
\ No newline at end of file
+    tee "${IMPALA_CLUSTER_LOGS_DIR}/run-ranger-server.log"
diff --git a/testdata/bin/run-hbase.sh b/testdata/bin/run-hbase.sh
index e7d67c9..39b369e 100755
--- a/testdata/bin/run-hbase.sh
+++ b/testdata/bin/run-hbase.sh
@@ -78,7 +78,7 @@ if ${CLUSTER_DIR}/admin is_kerberized; then
 
   # These ultimately become args to java when it starts up hbase
   K1="-Djava.security.krb5.conf=${KRB5_CONFIG}"
-  K2="${JAVA_KRB5_DEBUG}"
+  K2="${JAVA_KRB5_DEBUG:-}"
   K3="-Djava.security.auth.login.config=${HBASE_JAAS_CLIENT}"
   K4="-Djava.security.auth.login.config=${HBASE_JAAS_SERVER}"
 
diff --git a/testdata/cluster/admin b/testdata/cluster/admin
index 8efa387..708599a 100755
--- a/testdata/cluster/admin
+++ b/testdata/cluster/admin
@@ -31,7 +31,6 @@ set -euo pipefail
 . $IMPALA_HOME/bin/report_build_error.sh
 setup_report_build_error
 
-: ${IMPALA_KERBERIZE=}
 : ${INCLUDE_YARN=}
 
 # For Hive 3, we require Yarn for Tez support.
@@ -39,10 +38,9 @@ if "$USE_CDP_HIVE"; then
   INCLUDE_YARN=1
 fi
 
-while getopts vky OPT; do
+while getopts vy OPT; do
   case $OPT in
     v) set -x;;
-    k) export IMPALA_KERBERIZE=1;;
     y) export INCLUDE_YARN=1;;
     ?) echo "Usage: $0 [-v (verbose) -k (kerberize) -y (yarn)] ACTION (see source...)"; exit 1;;
   esac
@@ -109,7 +107,6 @@ EMPTY_NODE_DIRS=$(echo data/dfs/{dn,nn} var/{run,lib/hadoop-hdfs,log} \
     var/{log,run}/kudu/{master,ts} var/lib/kudu/{master,ts}/{wal,data})
 
 EASY_ACCESS_LOG_DIR="$IMPALA_CLUSTER_LOGS_DIR"
-MINIKDC_INIT=${IMPALA_HOME}/testdata/bin/minikdc.sh
 
 FIND_EXECUTABLE_FILTER="-executable"
 if ! find /dev/null "${FIND_EXECUTABLE_FILTER}" 2> /dev/null; then
@@ -127,12 +124,8 @@ fi
 # ourselves a fresh TGT (Ticket-Granting-Ticket).
 #
 function kerberize_setup {
-  if is_kerberized; then
-    export IMPALA_KERBERIZE=1
-  fi
-
   # No kerberos?  We're done.
-  if [ -z "${IMPALA_KERBERIZE}" ]; then
+  if [[ "${IMPALA_KERBERIZE}" != "true" ]]; then
     return
   fi
 
@@ -170,9 +163,6 @@ function kerberize_setup {
     exit 1
   fi
 
-  # Starting it has no effect if it's already started...
-  ${MINIKDC_INIT} start
-
   # Source the appropriate minikdc environment variables
   . ${MINIKDC_ENV}
 
@@ -190,7 +180,7 @@ function kerberize_setup {
 function is_kerberized {
   HCONFSC="`get_hadoop_client_conf_dir`/core-site.xml"
   if [ -f "${HCONFSC}" ]; then
-    if grep -qi "Kerberos settings" "${HCONFSC}"; then
+    if grep -qi "kerberos" "${HCONFSC}"; then
       # If the config exists and has kerberos things in it, treat as kerberized
       return 0
     fi
@@ -216,13 +206,9 @@ function create_cluster {
   # Blow away existing config files (so we don't pick up kerberos settings)
   rm -f `get_hadoop_client_conf_dir`/*
 
-  if [ ! -z "${IMPALA_KERBERIZE}" ]; then
+  if [[ "${IMPALA_KERBERIZE}" = "true" ]]; then
     kerberize_setup
     echo "Creating Kerberized cluster."
-  else
-    # Stop the minikdc in case it was running
-    . ${MINIKDC_ENV}
-    ${MINIKDC_INIT} stop
   fi
 
   echo "Hostname for internal communication: ${INTERNAL_LISTEN_HOST}" \
@@ -295,7 +281,7 @@ function create_cluster {
 
       # Chop out everything between the BEGIN/END Kerberos comments if
       # not kerberized
-      if [ -z "${IMPALA_KERBERIZE}" ]; then
+      if [[ "${IMPALA_KERBERIZE}" != "true" ]]; then
         sed '/<!-- BEGIN Kerberos/,/END Kerberos settings -->/d' \
             "$ACTUAL_PATH.1" > "$ACTUAL_PATH"
       else
@@ -329,11 +315,10 @@ function start_cluster {
     return 1
   fi
 
-  if [ ! -z "${IMPALA_KERBERIZE}" ] && ! is_kerberized; then
+  if [[ "${IMPALA_KERBERIZE}" = "true" ]] && ! is_kerberized; then
     echo "Kerberized start requested, but the config files aren't set up"
-    echo "for kerberos.  Destroy the cluster and rebuild it:"
-    echo "  --> $ ./testdata/cluster/admin delete_cluster"
-    echo "  --> $ IMPALA_KERBERIZE=1 ./testdata/cluster/admin create_cluster"
+    echo "for kerberos. You must regenerate configurations with "
+    echo  "  ./bin/create-test-configuration.sh"
     exit 1
   fi
 
diff --git a/testdata/cluster/node_templates/common/etc/hadoop/conf/core-site.xml.py b/testdata/cluster/node_templates/common/etc/hadoop/conf/core-site.xml.py
index 6af28f4..3a5faa3 100644
--- a/testdata/cluster/node_templates/common/etc/hadoop/conf/core-site.xml.py
+++ b/testdata/cluster/node_templates/common/etc/hadoop/conf/core-site.xml.py
@@ -20,7 +20,7 @@
 import os
 import sys
 
-kerberize = os.environ.get('IMPALA_KERBERIZE') == '1'
+kerberize = os.environ.get('IMPALA_KERBERIZE') == 'true'
 target_filesystem = os.environ.get('TARGET_FILESYSTEM')
 
 compression_codecs = [
diff --git a/testdata/cluster/node_templates/common/etc/hadoop/conf/yarn-site.xml.py b/testdata/cluster/node_templates/common/etc/hadoop/conf/yarn-site.xml.py
index 305feb3..769685f 100644
--- a/testdata/cluster/node_templates/common/etc/hadoop/conf/yarn-site.xml.py
+++ b/testdata/cluster/node_templates/common/etc/hadoop/conf/yarn-site.xml.py
@@ -20,7 +20,7 @@
 import os
 import sys
 
-kerberize = os.environ.get('IMPALA_KERBERIZE') == '1'
+kerberize = os.environ.get('IMPALA_KERBERIZE') == 'true'
 hive_major_version = int(os.environ['IMPALA_HIVE_VERSION'][0])