You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by st...@apache.org on 2021/02/05 12:23:30 UTC
[impala] 01/02: IMPALA-10389: impala-profile-tool container
This is an automated email from the ASF dual-hosted git repository.
stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
commit 93d4348b543a74248af9a64757fffe3d9a158648
Author: Tim Armstrong <ta...@cloudera.com>
AuthorDate: Mon Feb 1 09:31:37 2021 -0800
IMPALA-10389: impala-profile-tool container
Add a build step for an impala-profile-tool docker image
that makes it easy to run the binary on any system.
This container is automatically built as part of the
docker build.
This sets up a new build context that doesn't pull in all of
the same dependencies or depend on the Java build
Testing:
cat logs/cluster/profiles/* | \
docker run -i impala_profile_tool
I uploaded a build of the container to dockerhub too:
timgarmstrong/impala_profile_tool
Change-Id: I36915cd686ab930dcc934bc0c81bff8c16d46714
Reviewed-on: http://gerrit.cloudera.org:8080/17015
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
docker/CMakeLists.txt | 52 +++++++++++++---
docker/impala_profile_tool/Dockerfile | 57 ++++++++++++++++++
docker/setup_build_context.py | 110 +++++++++++++++++++++-------------
docker/utility_entrypoint.sh | 36 +++++++++++
4 files changed, 206 insertions(+), 49 deletions(-)
diff --git a/docker/CMakeLists.txt b/docker/CMakeLists.txt
index 7fe085b..23efee8 100644
--- a/docker/CMakeLists.txt
+++ b/docker/CMakeLists.txt
@@ -19,6 +19,10 @@ set(IMPALA_BASE_BUILD_CONTEXT_DIR
${CMAKE_SOURCE_DIR}/docker/build_context
)
+set(IMPALA_UTILITY_BUILD_CONTEXT_DIR
+ ${CMAKE_SOURCE_DIR}/docker/build_context_utility
+)
+
set(DOCKER_BUILD ${CMAKE_SOURCE_DIR}/docker/docker-build.sh)
find_program(LSB_RELEASE_EXEC lsb_release)
@@ -79,7 +83,7 @@ if (NOT ${DISTRO_BASE_IMAGE} STREQUAL "UNSUPPORTED")
add_custom_target(docker_images)
add_custom_target(docker_debug_images)
- set(daemon_image_names "")
+ set(exported_image_names "")
# Add a target with name 'target' to build a daemon image for the daemon with
# name 'daemon_name', e.g. "impalad_executor". The image is tagged as 'image_name'.
@@ -98,7 +102,7 @@ if (NOT ${DISTRO_BASE_IMAGE} STREQUAL "UNSUPPORTED")
COMMENT "Building ${image_name} docker image."
VERBATIM
)
- set(daemon_image_names "${daemon_image_names} ${image_name}" PARENT_SCOPE)
+ set(exported_image_names "${exported_image_names} ${image_name}" PARENT_SCOPE)
endfunction()
# Add debug and release docker image targets for the given daemon e.g. if called
@@ -113,7 +117,7 @@ if (NOT ${DISTRO_BASE_IMAGE} STREQUAL "UNSUPPORTED")
add_daemon_docker_image(${debug_target} ${daemon_name} ${debug_image} debug)
ADD_DEPENDENCIES(docker_images ${release_target})
ADD_DEPENDENCIES(docker_debug_images ${debug_target})
- set(daemon_image_names "${daemon_image_names} ${release_image}" PARENT_SCOPE)
+ set(exported_image_names "${exported_image_names} ${release_image}" PARENT_SCOPE)
endfunction()
# Stamp out image targets for all of the Impala daemons.
@@ -124,9 +128,6 @@ if (NOT ${DISTRO_BASE_IMAGE} STREQUAL "UNSUPPORTED")
add_daemon_docker_images(statestored)
add_daemon_docker_images(admissiond)
- # Generate a text file with all of the release daemon images.
- file(WRITE ${CMAKE_SOURCE_DIR}/docker/docker-images.txt "${daemon_image_names}")
-
# HMS quickstart image, which requires Hive and Hadoop builds.
set(quickstart_hms_build_dir ${CMAKE_SOURCE_DIR}/docker/quickstart_hms)
add_custom_target(quickstart_hms_build_setup
@@ -155,5 +156,42 @@ if (NOT ${DISTRO_BASE_IMAGE} STREQUAL "UNSUPPORTED")
COMMENT "Building quickstart client docker image."
VERBATIM
)
-endif()
+ # Add a target to build utility docker images for 'build_type'. 'build_context_args' are
+ # passed to the setup_build_context.py script.
+ function(add_utility_images profile_tool_target profile_tool_image build_type build_context_args)
+ # Build context depends on daemons and frontend jars.
+ # Sending the whole impala workspace including test binaries, testdata, etc
+ # to the docker daemon can be very expensive, so we create a build context
+ # with symlinks
+ add_custom_target(impala_utility_build_context_${build_type}
+ COMMAND ${CMAKE_SOURCE_DIR}/docker/setup_build_context.py ${build_context_args} --utility-context
+ DEPENDS impala-profile-tool ${CMAKE_SOURCE_DIR}/docker/setup_build_context.py
+ COMMENT "Creating impala utility build context build_type=${build_type}."
+ VERBATIM
+ )
+ # Target for the Impala profile tool image.
+ add_custom_target(${profile_tool_target}
+ # Run docker build inside the build context directory so that all dependencies are
+ # sent to the docker daemon. This allows the Dockerfile build to copy all necessary
+ # dependencies.
+ COMMAND tar cvh . -C ${CMAKE_SOURCE_DIR}/docker/impala_profile_tool/ . |
+ ${DOCKER_BUILD} -t ${profile_tool_image}
+ --build-arg BASE_IMAGE=${DISTRO_BASE_IMAGE} -
+ WORKING_DIRECTORY ${IMPALA_UTILITY_BUILD_CONTEXT_DIR}/${build_type}
+ DEPENDS impala_utility_build_context_${build_type} ${CMAKE_SOURCE_DIR}/docker/impala_profile_tool/Dockerfile
+ DEPENDS ${CMAKE_SOURCE_DIR}/docker/utility_entrypoint.sh
+ COMMENT "Building Impala profile tool docker image build_type=${build_type}."
+ VERBATIM
+ )
+ set(exported_image_names "${exported_image_names} ${profile_tool_target}" PARENT_SCOPE)
+ endfunction()
+ add_utility_images(impala_profile_tool_image impala_profile_tool release "")
+ add_utility_images(impala_profile_tool_debug_image impala_profile_tool_debug debug "--debug-build")
+
+ ADD_DEPENDENCIES(docker_images impala_profile_tool_image)
+ ADD_DEPENDENCIES(docker_debug_images impala_profile_tool_debug_image)
+
+ # Generate a text file with all of the release daemon images.
+ file(WRITE ${CMAKE_SOURCE_DIR}/docker/docker-images.txt "${exported_image_names}")
+endif()
diff --git a/docker/impala_profile_tool/Dockerfile b/docker/impala_profile_tool/Dockerfile
new file mode 100644
index 0000000..3a55b85
--- /dev/null
+++ b/docker/impala_profile_tool/Dockerfile
@@ -0,0 +1,57 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+ARG BASE_IMAGE=ubuntu:16.04
+FROM ${BASE_IMAGE}
+
+# Install dependencies required for Impala utility binaries to run, plus
+# some useful utilities.
+# TODO: ideally we wouldn't depend on the JVM libraries, but currently the JNI code
+# in be/ is not cleanly separated from the code that doesn't use JNI.
+RUN apt-get update && \
+ apt-get install -y openjdk-8-jre-headless \
+ libsasl2-2 libsasl2-modules libsasl2-modules-gssapi-mit \
+ sudo netcat-openbsd less curl iproute2 vim iputils-ping \
+ krb5-user && \
+ apt-get clean && \
+ rm -rf /var/lib/apt/lists/*
+
+# Use a non-privileged impala user to run the processes in the container.
+# That user should own everything in the /opt/impala subdirectory.
+RUN groupadd -r impala -g 1000 && useradd --no-log-init -r -u 1000 -g 1000 impala && \
+ mkdir -p /opt/impala && chown impala /opt/impala && \
+ chmod ugo+w /etc/passwd
+USER impala
+
+# Copy build artifacts required for the utilities.
+# Need to have multiple copy commands to preserve directory structure.
+COPY --chown=impala bin /opt/impala/bin
+COPY --chown=impala lib /opt/impala/lib
+
+WORKDIR /opt/impala/
+
+ENTRYPOINT ["/opt/impala/bin/utility_entrypoint.sh", "/opt/impala/bin/impala-profile-tool",\
+ "-logtostderr"]
+
+LABEL name="Apache Impala Profile Tool" \
+ description="Tool for working with Impala profiles." \
+ # Common labels.
+ org.label-schema.maintainer=$MAINTAINER \
+ org.label-schema.url=$URL \
+ org.label-schema.vcs-ref=$VCS_REF \
+ org.label-schema.vcs-type=$VCS_TYPE \
+ org.label-schema.vcs-url=$VCS_URL \
+ org.label-schema.version=$VERSION
diff --git a/docker/setup_build_context.py b/docker/setup_build_context.py
index c1164e3..037ed65 100755
--- a/docker/setup_build_context.py
+++ b/docker/setup_build_context.py
@@ -29,6 +29,9 @@ from subprocess import check_call
parser = argparse.ArgumentParser()
parser.add_argument("--debug-build", help="Setup build context for debug build",
action="store_true")
+parser.add_argument("--utility-context",
+ help="Setup utility build context instead of daemon",
+ action="store_true")
args = parser.parse_args()
IMPALA_HOME = os.environ["IMPALA_HOME"]
@@ -36,7 +39,10 @@ if args.debug_build:
BUILD_TYPE = "debug"
else:
BUILD_TYPE = "release"
-OUTPUT_DIR = os.path.join(IMPALA_HOME, "docker/build_context", BUILD_TYPE)
+if args.utility_context:
+ OUTPUT_DIR = os.path.join(IMPALA_HOME, "docker/build_context_utility", BUILD_TYPE)
+else:
+ OUTPUT_DIR = os.path.join(IMPALA_HOME, "docker/build_context", BUILD_TYPE)
IMPALA_TOOLCHAIN_PACKAGES_HOME = os.environ["IMPALA_TOOLCHAIN_PACKAGES_HOME"]
IMPALA_GCC_VERSION = os.environ["IMPALA_GCC_VERSION"]
@@ -66,10 +72,16 @@ LIB_DIR = os.path.join(OUTPUT_DIR, "lib")
# The statestore does not require any jar files since it does not run an embedded JVM.
STATESTORE_LIB_DIR = os.path.join(OUTPUT_DIR, "statestore-lib")
+# We generate multiple library directories for the build context for daemons,
+# but only a single one for the utility build context.
+if args.utility_context:
+ TARGET_LIB_DIRS = [LIB_DIR]
+else:
+ TARGET_LIB_DIRS = [LIB_DIR, EXEC_LIB_DIR, STATESTORE_LIB_DIR]
+
os.mkdir(BIN_DIR)
-os.mkdir(EXEC_LIB_DIR)
-os.mkdir(LIB_DIR)
-os.mkdir(STATESTORE_LIB_DIR)
+for lib_dir in TARGET_LIB_DIRS:
+ os.mkdir(lib_dir)
def symlink_file_into_dir(src_file, dst_dir):
@@ -92,20 +104,29 @@ def strip_debug_symbols(src_file, dst_dirs):
# Impala binaries and native dependencies.
+
# Strip debug symbols from release build to reduce image size. Keep them for
# debug build.
-IMPALAD_BINARY = os.path.join(IMPALA_HOME, "be/build", BUILD_TYPE, "service/impalad")
-if args.debug_build:
- symlink_file_into_dir(IMPALAD_BINARY, BIN_DIR)
+if args.utility_context:
+ PROFILE_TOOL_BINARY = os.path.join(
+ IMPALA_HOME, "be/build", BUILD_TYPE, "util/impala-profile-tool")
+ if args.debug_build:
+ symlink_file_into_dir(PROFILE_TOOL_BINARY, BIN_DIR)
+ else:
+ strip_debug_symbols(PROFILE_TOOL_BINARY, [BIN_DIR])
else:
- strip_debug_symbols(IMPALAD_BINARY, [BIN_DIR])
+ IMPALAD_BINARY = os.path.join(IMPALA_HOME, "be/build", BUILD_TYPE, "service/impalad")
+ if args.debug_build:
+ symlink_file_into_dir(IMPALAD_BINARY, BIN_DIR)
+ else:
+ strip_debug_symbols(IMPALAD_BINARY, [BIN_DIR])
# Add libstc++ binaries to LIB_DIR. Strip debug symbols for release builds.
for libstdcpp_so in glob.glob(os.path.join(
GCC_HOME, "lib64/{0}*.so*".format("libstdc++"))):
# Ignore 'libstdc++.so.*-gdb.py'.
if not os.path.basename(libstdcpp_so).endswith(".py"):
- dst_dirs = [LIB_DIR, EXEC_LIB_DIR, STATESTORE_LIB_DIR]
+ dst_dirs = TARGET_LIB_DIRS
if args.debug_build:
symlink_file_into_dirs(libstdcpp_so, dst_dirs)
else:
@@ -113,44 +134,49 @@ for libstdcpp_so in glob.glob(os.path.join(
# Add libgcc binaries to LIB_DIR.
for libgcc_so in glob.glob(os.path.join(GCC_HOME, "lib64/{0}*.so*".format("libgcc_s"))):
- symlink_file_into_dirs(libgcc_so, [LIB_DIR, EXEC_LIB_DIR, STATESTORE_LIB_DIR])
+ symlink_file_into_dirs(libgcc_so, TARGET_LIB_DIRS)
# Add libkudu_client binaries to LIB_DIR. Strip debug symbols for release builds.
for kudu_client_so in glob.glob(os.path.join(KUDU_LIB_DIR, "libkudu_client.so*")):
- # For some reason, statestored requires libkudu_client.so.
- dst_dirs = [LIB_DIR, EXEC_LIB_DIR, STATESTORE_LIB_DIR]
+ # All backend binaries currently link against libkudu_client.so even if they don't need
+ # them.
+ dst_dirs = TARGET_LIB_DIRS
if args.debug_build:
symlink_file_into_dirs(kudu_client_so, dst_dirs)
else:
strip_debug_symbols(kudu_client_so, dst_dirs)
-# Impala Coordinator dependencies.
-dep_classpath = file(os.path.join(IMPALA_HOME, "fe/target/build-classpath.txt")).read()
-for jar in dep_classpath.split(":"):
- assert os.path.exists(jar), "missing jar from classpath: {0}".format(jar)
- symlink_file_into_dir(jar, LIB_DIR)
-
-# Impala Coordinator jars.
-num_frontend_jars = 0
-for jar in glob.glob(os.path.join(IMPALA_HOME, "fe/target/impala-frontend-*.jar")):
- # Ignore the tests jar
- if jar.find("-tests") != -1:
- continue
- symlink_file_into_dir(jar, LIB_DIR)
- num_frontend_jars += 1
-# There must be exactly one impala-frontend jar.
-assert num_frontend_jars == 1
-
-# Impala Executor dependencies.
-dep_classpath = file(os.path.join(IMPALA_HOME,
- "java/executor-deps/target/build-executor-deps-classpath.txt")).read()
-for jar in dep_classpath.split(":"):
- assert os.path.exists(jar), "missing jar from classpath: {0}".format(jar)
- symlink_file_into_dir(jar, EXEC_LIB_DIR)
-
-# Templates for debug web pages.
-os.symlink(os.path.join(IMPALA_HOME, "www"), os.path.join(OUTPUT_DIR, "www"))
-# Scripts
-symlink_file_into_dir(os.path.join(IMPALA_HOME, "docker/daemon_entrypoint.sh"), BIN_DIR)
-symlink_file_into_dir(os.path.join(IMPALA_HOME, "bin/graceful_shutdown_backends.sh"),
- BIN_DIR)
+if args.utility_context:
+ symlink_file_into_dir(
+ os.path.join(IMPALA_HOME, "docker/utility_entrypoint.sh"), BIN_DIR)
+else:
+ # Impala Coordinator dependencies.
+ dep_classpath = file(os.path.join(IMPALA_HOME, "fe/target/build-classpath.txt")).read()
+ for jar in dep_classpath.split(":"):
+ assert os.path.exists(jar), "missing jar from classpath: {0}".format(jar)
+ symlink_file_into_dir(jar, LIB_DIR)
+
+ # Impala Coordinator jars.
+ num_frontend_jars = 0
+ for jar in glob.glob(os.path.join(IMPALA_HOME, "fe/target/impala-frontend-*.jar")):
+ # Ignore the tests jar
+ if jar.find("-tests") != -1:
+ continue
+ symlink_file_into_dir(jar, LIB_DIR)
+ num_frontend_jars += 1
+ # There must be exactly one impala-frontend jar.
+ assert num_frontend_jars == 1
+
+ # Impala Executor dependencies.
+ dep_classpath = file(os.path.join(IMPALA_HOME,
+ "java/executor-deps/target/build-executor-deps-classpath.txt")).read()
+ for jar in dep_classpath.split(":"):
+ assert os.path.exists(jar), "missing jar from classpath: {0}".format(jar)
+ symlink_file_into_dir(jar, EXEC_LIB_DIR)
+
+ # Templates for debug web pages.
+ os.symlink(os.path.join(IMPALA_HOME, "www"), os.path.join(OUTPUT_DIR, "www"))
+ # Scripts
+ symlink_file_into_dir(os.path.join(IMPALA_HOME, "docker/daemon_entrypoint.sh"), BIN_DIR)
+ symlink_file_into_dir(os.path.join(IMPALA_HOME, "bin/graceful_shutdown_backends.sh"),
+ BIN_DIR)
diff --git a/docker/utility_entrypoint.sh b/docker/utility_entrypoint.sh
new file mode 100755
index 0000000..0fd8785
--- /dev/null
+++ b/docker/utility_entrypoint.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Wrapper script that runs the command-line provided as its arguments after
+# setting up the environment required for utilities like impala-profile-tool
+# to run.
+
+export IMPALA_HOME=/opt/impala
+
+# Add directories containing dynamic libraries required by the daemons that
+# are not on the system library paths.
+export LD_LIBRARY_PATH=/opt/impala/lib
+LD_LIBRARY_PATH+=:/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/amd64/
+LD_LIBRARY_PATH+=:/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/amd64/server/
+
+echo "LD_LIBRARY_PATH: $LD_LIBRARY_PATH"
+
+# Set ulimit core file size 0.
+ulimit -c 0
+
+exec "$@"