You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by st...@apache.org on 2021/02/05 12:23:30 UTC

[impala] 01/02: IMPALA-10389: impala-profile-tool container

This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 93d4348b543a74248af9a64757fffe3d9a158648
Author: Tim Armstrong <ta...@cloudera.com>
AuthorDate: Mon Feb 1 09:31:37 2021 -0800

    IMPALA-10389: impala-profile-tool container
    
    Add a build step for an impala-profile-tool docker image
    that makes it easy to run the binary on any system.
    
    This container is automatically built as part of the
    docker build.
    
    This sets up a new build context that doesn't pull in all of
    the same dependencies or depend on the Java build
    
    Testing:
    
      cat logs/cluster/profiles/* | \
        docker run -i impala_profile_tool
    
    I uploaded a build of the container to dockerhub too:
    
      timgarmstrong/impala_profile_tool
    
    Change-Id: I36915cd686ab930dcc934bc0c81bff8c16d46714
    Reviewed-on: http://gerrit.cloudera.org:8080/17015
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 docker/CMakeLists.txt                 |  52 +++++++++++++---
 docker/impala_profile_tool/Dockerfile |  57 ++++++++++++++++++
 docker/setup_build_context.py         | 110 +++++++++++++++++++++-------------
 docker/utility_entrypoint.sh          |  36 +++++++++++
 4 files changed, 206 insertions(+), 49 deletions(-)

diff --git a/docker/CMakeLists.txt b/docker/CMakeLists.txt
index 7fe085b..23efee8 100644
--- a/docker/CMakeLists.txt
+++ b/docker/CMakeLists.txt
@@ -19,6 +19,10 @@ set(IMPALA_BASE_BUILD_CONTEXT_DIR
   ${CMAKE_SOURCE_DIR}/docker/build_context
 )
 
+set(IMPALA_UTILITY_BUILD_CONTEXT_DIR
+  ${CMAKE_SOURCE_DIR}/docker/build_context_utility
+)
+
 set(DOCKER_BUILD ${CMAKE_SOURCE_DIR}/docker/docker-build.sh)
 
 find_program(LSB_RELEASE_EXEC lsb_release)
@@ -79,7 +83,7 @@ if (NOT ${DISTRO_BASE_IMAGE} STREQUAL "UNSUPPORTED")
   add_custom_target(docker_images)
   add_custom_target(docker_debug_images)
 
-  set(daemon_image_names "")
+  set(exported_image_names "")
 
   # Add a target with name 'target' to build a daemon image for the daemon with
   # name 'daemon_name', e.g. "impalad_executor". The image is tagged as 'image_name'.
@@ -98,7 +102,7 @@ if (NOT ${DISTRO_BASE_IMAGE} STREQUAL "UNSUPPORTED")
       COMMENT "Building ${image_name} docker image."
       VERBATIM
     )
-    set(daemon_image_names "${daemon_image_names} ${image_name}" PARENT_SCOPE)
+    set(exported_image_names "${exported_image_names} ${image_name}" PARENT_SCOPE)
   endfunction()
 
   # Add debug and release docker image targets for the given daemon e.g. if called
@@ -113,7 +117,7 @@ if (NOT ${DISTRO_BASE_IMAGE} STREQUAL "UNSUPPORTED")
     add_daemon_docker_image(${debug_target} ${daemon_name} ${debug_image} debug)
     ADD_DEPENDENCIES(docker_images ${release_target})
     ADD_DEPENDENCIES(docker_debug_images ${debug_target})
-    set(daemon_image_names "${daemon_image_names} ${release_image}" PARENT_SCOPE)
+    set(exported_image_names "${exported_image_names} ${release_image}" PARENT_SCOPE)
   endfunction()
 
   # Stamp out image targets for all of the Impala daemons.
@@ -124,9 +128,6 @@ if (NOT ${DISTRO_BASE_IMAGE} STREQUAL "UNSUPPORTED")
   add_daemon_docker_images(statestored)
   add_daemon_docker_images(admissiond)
 
-  # Generate a text file with all of the release daemon images.
-  file(WRITE ${CMAKE_SOURCE_DIR}/docker/docker-images.txt "${daemon_image_names}")
-
   # HMS quickstart image, which requires Hive and Hadoop builds.
   set(quickstart_hms_build_dir ${CMAKE_SOURCE_DIR}/docker/quickstart_hms)
   add_custom_target(quickstart_hms_build_setup
@@ -155,5 +156,42 @@ if (NOT ${DISTRO_BASE_IMAGE} STREQUAL "UNSUPPORTED")
     COMMENT "Building quickstart client docker image."
     VERBATIM
   )
-endif()
 
+  # Add a target to build utility docker images for 'build_type'. 'build_context_args' are
+  # passed to the setup_build_context.py script.
+  function(add_utility_images profile_tool_target profile_tool_image build_type build_context_args)
+    # Build context depends on daemons and frontend jars.
+    # Sending the whole impala workspace including test binaries, testdata, etc
+    # to the docker daemon can be very expensive, so we create a build context
+    # with symlinks
+    add_custom_target(impala_utility_build_context_${build_type}
+      COMMAND ${CMAKE_SOURCE_DIR}/docker/setup_build_context.py ${build_context_args} --utility-context
+      DEPENDS impala-profile-tool ${CMAKE_SOURCE_DIR}/docker/setup_build_context.py
+      COMMENT "Creating impala utility build context build_type=${build_type}."
+      VERBATIM
+    )
+    # Target for the Impala profile tool image.
+    add_custom_target(${profile_tool_target}
+      # Run docker build inside the build context directory so that all dependencies are
+      # sent to the docker daemon. This allows the Dockerfile build to copy all necessary
+      # dependencies.
+      COMMAND tar cvh . -C ${CMAKE_SOURCE_DIR}/docker/impala_profile_tool/ . |
+              ${DOCKER_BUILD} -t ${profile_tool_image}
+                  --build-arg BASE_IMAGE=${DISTRO_BASE_IMAGE} -
+      WORKING_DIRECTORY ${IMPALA_UTILITY_BUILD_CONTEXT_DIR}/${build_type}
+      DEPENDS impala_utility_build_context_${build_type} ${CMAKE_SOURCE_DIR}/docker/impala_profile_tool/Dockerfile
+      DEPENDS ${CMAKE_SOURCE_DIR}/docker/utility_entrypoint.sh
+      COMMENT "Building Impala profile tool docker image build_type=${build_type}."
+      VERBATIM
+    )
+    set(exported_image_names "${exported_image_names} ${profile_tool_target}" PARENT_SCOPE)
+  endfunction()
+  add_utility_images(impala_profile_tool_image impala_profile_tool release "")
+  add_utility_images(impala_profile_tool_debug_image impala_profile_tool_debug debug "--debug-build")
+
+  ADD_DEPENDENCIES(docker_images impala_profile_tool_image)
+  ADD_DEPENDENCIES(docker_debug_images impala_profile_tool_debug_image)
+
+  # Generate a text file with all of the release daemon images.
+  file(WRITE ${CMAKE_SOURCE_DIR}/docker/docker-images.txt "${exported_image_names}")
+endif()
diff --git a/docker/impala_profile_tool/Dockerfile b/docker/impala_profile_tool/Dockerfile
new file mode 100644
index 0000000..3a55b85
--- /dev/null
+++ b/docker/impala_profile_tool/Dockerfile
@@ -0,0 +1,57 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+ARG BASE_IMAGE=ubuntu:16.04
+FROM ${BASE_IMAGE}
+
+# Install dependencies required for Impala utility binaries to run, plus
+# some useful utilities.
+# TODO: ideally we wouldn't depend on the JVM libraries, but currently the JNI code
+# in be/ is not cleanly separated from the code that doesn't use JNI.
+RUN apt-get update && \
+  apt-get install -y openjdk-8-jre-headless \
+  libsasl2-2 libsasl2-modules libsasl2-modules-gssapi-mit \
+  sudo netcat-openbsd less curl iproute2 vim iputils-ping \
+  krb5-user && \
+  apt-get clean && \
+  rm -rf /var/lib/apt/lists/*
+
+# Use a non-privileged impala user to run the processes in the container.
+# That user should own everything in the /opt/impala subdirectory.
+RUN groupadd -r impala -g 1000 && useradd --no-log-init -r -u 1000 -g 1000 impala && \
+    mkdir -p /opt/impala && chown impala /opt/impala && \
+    chmod ugo+w /etc/passwd
+USER impala
+
+# Copy build artifacts required for the utilities.
+# Need to have multiple copy commands to preserve directory structure.
+COPY --chown=impala bin /opt/impala/bin
+COPY --chown=impala lib /opt/impala/lib
+
+WORKDIR /opt/impala/
+
+ENTRYPOINT ["/opt/impala/bin/utility_entrypoint.sh", "/opt/impala/bin/impala-profile-tool",\
+     "-logtostderr"]
+
+LABEL name="Apache Impala Profile Tool" \
+      description="Tool for working with Impala profiles." \
+      # Common labels.
+      org.label-schema.maintainer=$MAINTAINER \
+      org.label-schema.url=$URL \
+      org.label-schema.vcs-ref=$VCS_REF \
+      org.label-schema.vcs-type=$VCS_TYPE \
+      org.label-schema.vcs-url=$VCS_URL \
+      org.label-schema.version=$VERSION
diff --git a/docker/setup_build_context.py b/docker/setup_build_context.py
index c1164e3..037ed65 100755
--- a/docker/setup_build_context.py
+++ b/docker/setup_build_context.py
@@ -29,6 +29,9 @@ from subprocess import check_call
 parser = argparse.ArgumentParser()
 parser.add_argument("--debug-build", help="Setup build context for debug build",
                     action="store_true")
+parser.add_argument("--utility-context",
+                    help="Setup utility build context instead of daemon",
+                    action="store_true")
 args = parser.parse_args()
 
 IMPALA_HOME = os.environ["IMPALA_HOME"]
@@ -36,7 +39,10 @@ if args.debug_build:
   BUILD_TYPE = "debug"
 else:
   BUILD_TYPE = "release"
-OUTPUT_DIR = os.path.join(IMPALA_HOME, "docker/build_context", BUILD_TYPE)
+if args.utility_context:
+  OUTPUT_DIR = os.path.join(IMPALA_HOME, "docker/build_context_utility", BUILD_TYPE)
+else:
+  OUTPUT_DIR = os.path.join(IMPALA_HOME, "docker/build_context", BUILD_TYPE)
 
 IMPALA_TOOLCHAIN_PACKAGES_HOME = os.environ["IMPALA_TOOLCHAIN_PACKAGES_HOME"]
 IMPALA_GCC_VERSION = os.environ["IMPALA_GCC_VERSION"]
@@ -66,10 +72,16 @@ LIB_DIR = os.path.join(OUTPUT_DIR, "lib")
 # The statestore does not require any jar files since it does not run an embedded JVM.
 STATESTORE_LIB_DIR = os.path.join(OUTPUT_DIR, "statestore-lib")
 
+# We generate multiple library directories for the build context for daemons,
+# but only a single one for the utility build context.
+if args.utility_context:
+  TARGET_LIB_DIRS = [LIB_DIR]
+else:
+  TARGET_LIB_DIRS = [LIB_DIR, EXEC_LIB_DIR, STATESTORE_LIB_DIR]
+
 os.mkdir(BIN_DIR)
-os.mkdir(EXEC_LIB_DIR)
-os.mkdir(LIB_DIR)
-os.mkdir(STATESTORE_LIB_DIR)
+for lib_dir in TARGET_LIB_DIRS:
+  os.mkdir(lib_dir)
 
 
 def symlink_file_into_dir(src_file, dst_dir):
@@ -92,20 +104,29 @@ def strip_debug_symbols(src_file, dst_dirs):
 
 # Impala binaries and native dependencies.
 
+
 # Strip debug symbols from release build to reduce image size. Keep them for
 # debug build.
-IMPALAD_BINARY = os.path.join(IMPALA_HOME, "be/build", BUILD_TYPE, "service/impalad")
-if args.debug_build:
-  symlink_file_into_dir(IMPALAD_BINARY, BIN_DIR)
+if args.utility_context:
+  PROFILE_TOOL_BINARY = os.path.join(
+      IMPALA_HOME, "be/build", BUILD_TYPE, "util/impala-profile-tool")
+  if args.debug_build:
+    symlink_file_into_dir(PROFILE_TOOL_BINARY, BIN_DIR)
+  else:
+    strip_debug_symbols(PROFILE_TOOL_BINARY, [BIN_DIR])
 else:
-  strip_debug_symbols(IMPALAD_BINARY, [BIN_DIR])
+  IMPALAD_BINARY = os.path.join(IMPALA_HOME, "be/build", BUILD_TYPE, "service/impalad")
+  if args.debug_build:
+    symlink_file_into_dir(IMPALAD_BINARY, BIN_DIR)
+  else:
+    strip_debug_symbols(IMPALAD_BINARY, [BIN_DIR])
 
 # Add libstc++ binaries to LIB_DIR. Strip debug symbols for release builds.
 for libstdcpp_so in glob.glob(os.path.join(
     GCC_HOME, "lib64/{0}*.so*".format("libstdc++"))):
   # Ignore 'libstdc++.so.*-gdb.py'.
   if not os.path.basename(libstdcpp_so).endswith(".py"):
-    dst_dirs = [LIB_DIR, EXEC_LIB_DIR, STATESTORE_LIB_DIR]
+    dst_dirs = TARGET_LIB_DIRS
     if args.debug_build:
       symlink_file_into_dirs(libstdcpp_so, dst_dirs)
     else:
@@ -113,44 +134,49 @@ for libstdcpp_so in glob.glob(os.path.join(
 
 # Add libgcc binaries to LIB_DIR.
 for libgcc_so in glob.glob(os.path.join(GCC_HOME, "lib64/{0}*.so*".format("libgcc_s"))):
-  symlink_file_into_dirs(libgcc_so, [LIB_DIR, EXEC_LIB_DIR, STATESTORE_LIB_DIR])
+  symlink_file_into_dirs(libgcc_so, TARGET_LIB_DIRS)
 
 # Add libkudu_client binaries to LIB_DIR. Strip debug symbols for release builds.
 for kudu_client_so in glob.glob(os.path.join(KUDU_LIB_DIR, "libkudu_client.so*")):
-  # For some reason, statestored requires libkudu_client.so.
-  dst_dirs = [LIB_DIR, EXEC_LIB_DIR, STATESTORE_LIB_DIR]
+  # All backend binaries currently link against libkudu_client.so even if they don't need
+  # them.
+  dst_dirs = TARGET_LIB_DIRS
   if args.debug_build:
     symlink_file_into_dirs(kudu_client_so, dst_dirs)
   else:
     strip_debug_symbols(kudu_client_so, dst_dirs)
 
-# Impala Coordinator dependencies.
-dep_classpath = file(os.path.join(IMPALA_HOME, "fe/target/build-classpath.txt")).read()
-for jar in dep_classpath.split(":"):
-  assert os.path.exists(jar), "missing jar from classpath: {0}".format(jar)
-  symlink_file_into_dir(jar, LIB_DIR)
-
-# Impala Coordinator jars.
-num_frontend_jars = 0
-for jar in glob.glob(os.path.join(IMPALA_HOME, "fe/target/impala-frontend-*.jar")):
-  # Ignore the tests jar
-  if jar.find("-tests") != -1:
-    continue
-  symlink_file_into_dir(jar, LIB_DIR)
-  num_frontend_jars += 1
-# There must be exactly one impala-frontend jar.
-assert num_frontend_jars == 1
-
-# Impala Executor dependencies.
-dep_classpath = file(os.path.join(IMPALA_HOME,
-    "java/executor-deps/target/build-executor-deps-classpath.txt")).read()
-for jar in dep_classpath.split(":"):
-  assert os.path.exists(jar), "missing jar from classpath: {0}".format(jar)
-  symlink_file_into_dir(jar, EXEC_LIB_DIR)
-
-# Templates for debug web pages.
-os.symlink(os.path.join(IMPALA_HOME, "www"), os.path.join(OUTPUT_DIR, "www"))
-# Scripts
-symlink_file_into_dir(os.path.join(IMPALA_HOME, "docker/daemon_entrypoint.sh"), BIN_DIR)
-symlink_file_into_dir(os.path.join(IMPALA_HOME, "bin/graceful_shutdown_backends.sh"),
-                      BIN_DIR)
+if args.utility_context:
+  symlink_file_into_dir(
+      os.path.join(IMPALA_HOME, "docker/utility_entrypoint.sh"), BIN_DIR)
+else:
+  # Impala Coordinator dependencies.
+  dep_classpath = file(os.path.join(IMPALA_HOME, "fe/target/build-classpath.txt")).read()
+  for jar in dep_classpath.split(":"):
+    assert os.path.exists(jar), "missing jar from classpath: {0}".format(jar)
+    symlink_file_into_dir(jar, LIB_DIR)
+
+  # Impala Coordinator jars.
+  num_frontend_jars = 0
+  for jar in glob.glob(os.path.join(IMPALA_HOME, "fe/target/impala-frontend-*.jar")):
+    # Ignore the tests jar
+    if jar.find("-tests") != -1:
+      continue
+    symlink_file_into_dir(jar, LIB_DIR)
+    num_frontend_jars += 1
+  # There must be exactly one impala-frontend jar.
+  assert num_frontend_jars == 1
+
+  # Impala Executor dependencies.
+  dep_classpath = file(os.path.join(IMPALA_HOME,
+      "java/executor-deps/target/build-executor-deps-classpath.txt")).read()
+  for jar in dep_classpath.split(":"):
+    assert os.path.exists(jar), "missing jar from classpath: {0}".format(jar)
+    symlink_file_into_dir(jar, EXEC_LIB_DIR)
+
+  # Templates for debug web pages.
+  os.symlink(os.path.join(IMPALA_HOME, "www"), os.path.join(OUTPUT_DIR, "www"))
+  # Scripts
+  symlink_file_into_dir(os.path.join(IMPALA_HOME, "docker/daemon_entrypoint.sh"), BIN_DIR)
+  symlink_file_into_dir(os.path.join(IMPALA_HOME, "bin/graceful_shutdown_backends.sh"),
+                        BIN_DIR)
diff --git a/docker/utility_entrypoint.sh b/docker/utility_entrypoint.sh
new file mode 100755
index 0000000..0fd8785
--- /dev/null
+++ b/docker/utility_entrypoint.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Wrapper script that runs the command-line provided as its arguments after
+# setting up the environment required for utilities like impala-profile-tool
+# to run.
+
+export IMPALA_HOME=/opt/impala
+
+# Add directories containing dynamic libraries required by the daemons that
+# are not on the system library paths.
+export LD_LIBRARY_PATH=/opt/impala/lib
+LD_LIBRARY_PATH+=:/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/amd64/
+LD_LIBRARY_PATH+=:/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/amd64/server/
+
+echo "LD_LIBRARY_PATH: $LD_LIBRARY_PATH"
+
+# Set ulimit core file size 0.
+ulimit -c 0
+
+exec "$@"