You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by st...@apache.org on 2021/02/05 12:23:29 UTC

[impala] branch master updated (e71ea69 -> 0473e1b)

This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git.


    from e71ea69  IMPALA-10459: Remove workarounds for MAPREDUCE-6441
     new 93d4348  IMPALA-10389: impala-profile-tool container
     new 0473e1b  IMPALA-10473: Fix wrong analytic results on constant partition/order by exprs

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 docker/CMakeLists.txt                              |  52 ++++++--
 .../Dockerfile                                     |  31 ++---
 docker/setup_build_context.py                      | 110 +++++++++-------
 bin/run-binary.sh => docker/utility_entrypoint.sh  |  21 ++--
 .../apache/impala/planner/AnalyticEvalNode.java    |  13 ++
 .../apache/impala/planner/DistributedPlanner.java  |   5 +-
 .../queries/PlannerTest/analytic-fns.test          | 139 ++++++++++++++++++++-
 .../queries/QueryTest/analytic-fns.test            |  19 +++
 8 files changed, 311 insertions(+), 79 deletions(-)
 copy docker/{impala_base => impala_profile_tool}/Dockerfile (64%)
 copy bin/run-binary.sh => docker/utility_entrypoint.sh (58%)


[impala] 02/02: IMPALA-10473: Fix wrong analytic results on constant partition/order by exprs

Posted by st...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 0473e1b973b18c3ad8845a99fb1b1e70bc7c70fc
Author: stiga-huang <hu...@gmail.com>
AuthorDate: Thu Feb 4 17:22:13 2021 +0800

    IMPALA-10473: Fix wrong analytic results on constant partition/order by exprs
    
    When the Partition-by and Order-by expressions of an analytic are all
    constants, it should be evaluated in a single unpartitioned fragment
    (same as analytics that have no Partition-by/Order-by exprs). Currently,
    it's placed within the same fragment with the child node, which causes
    it to be computed locally and get incorrect results when the fragment is
    partitioned.
    
    Tests:
     - Added planner tests
     - Added e2e tests
    
    Change-Id: Ibc88a410dab984ff37e27dc635bee5f289003a2a
    Reviewed-on: http://gerrit.cloudera.org:8080/17023
    Reviewed-by: Aman Sinha <am...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 .../apache/impala/planner/AnalyticEvalNode.java    |  13 ++
 .../apache/impala/planner/DistributedPlanner.java  |   5 +-
 .../queries/PlannerTest/analytic-fns.test          | 139 ++++++++++++++++++++-
 .../queries/QueryTest/analytic-fns.test            |  19 +++
 4 files changed, 171 insertions(+), 5 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/planner/AnalyticEvalNode.java b/fe/src/main/java/org/apache/impala/planner/AnalyticEvalNode.java
index 70d5cba..7f8b0b5 100644
--- a/fe/src/main/java/org/apache/impala/planner/AnalyticEvalNode.java
+++ b/fe/src/main/java/org/apache/impala/planner/AnalyticEvalNode.java
@@ -115,6 +115,19 @@ public class AnalyticEvalNode extends PlanNode {
   public List<Expr> getPartitionExprs() { return partitionExprs_; }
   public List<OrderByElement> getOrderByElements() { return orderByElements_; }
 
+  /**
+   * Returns whether it should be computed in a single unpartitioned fragment.
+   * True when Partition-By and Order-By exprs are all empty or constant.
+   */
+  public boolean requiresUnpartitionedEval() {
+    // false when any Partition-By/Order-By exprs are non-constant
+    if (!Expr.allConstant(partitionExprs_)) return false;
+    for (OrderByElement orderBy : orderByElements_) {
+      if (!orderBy.getExpr().isConstant()) return false;
+    }
+    return true;
+  }
+
   @Override
   public void init(Analyzer analyzer) {
     Preconditions.checkState(conjuncts_.isEmpty());
diff --git a/fe/src/main/java/org/apache/impala/planner/DistributedPlanner.java b/fe/src/main/java/org/apache/impala/planner/DistributedPlanner.java
index 9f7456d..3867d21 100644
--- a/fe/src/main/java/org/apache/impala/planner/DistributedPlanner.java
+++ b/fe/src/main/java/org/apache/impala/planner/DistributedPlanner.java
@@ -1060,10 +1060,7 @@ public class DistributedPlanner {
         node instanceof SortNode || node instanceof AnalyticEvalNode);
     if (node instanceof AnalyticEvalNode) {
       AnalyticEvalNode analyticNode = (AnalyticEvalNode) node;
-      if (analyticNode.getPartitionExprs().isEmpty()
-          && analyticNode.getOrderByElements().isEmpty()) {
-        // no Partition-By/Order-By exprs: compute analytic exprs in single
-        // unpartitioned fragment
+      if (analyticNode.requiresUnpartitionedEval()) {
         PlanFragment fragment = childFragment;
         if (childFragment.isPartitioned()) {
           fragment = createParentFragment(childFragment, DataPartition.UNPARTITIONED);
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/analytic-fns.test b/testdata/workloads/functional-planner/queries/PlannerTest/analytic-fns.test
index 845b876..aaa9606 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/analytic-fns.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/analytic-fns.test
@@ -3181,7 +3181,7 @@ SELECT MIN(n_nationkey) OVER (PARTITION BY n_regionkey)
 FROM functional.alltypes t1
     LEFT JOIN tpch_kudu.nation t2 ON t1.id = t2.n_nationkey
 WHERE t2.n_name IN ('ALGERIA', 'ARGENTINA')
---- PLAN
+---- PLAN
 PLAN-ROOT SINK
 |
 04:ANALYTIC
@@ -3206,3 +3206,140 @@ PLAN-ROOT SINK
    HDFS partitions=24/24 files=24 size=478.45KB
    row-size=4B cardinality=7.30K
 ====
+# IMPALA-10473: AnalyticEvalNodes with constant Partition-by and Order-by exprs should be
+# computed in single unpartitioned fragments. In this case, it's placed in the coordinator
+# fragment which contains PlanRootSink.
+select row_number() over (order by 'a') from functional.alltypes
+---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
+01:ANALYTIC
+|  functions: row_number()
+|  order by: 'a' ASC
+|  window: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+|  row-size=8B cardinality=7.30K
+|
+02:EXCHANGE [UNPARTITIONED]
+|
+00:SCAN HDFS [functional.alltypes]
+   HDFS partitions=24/24 files=24 size=478.45KB
+   row-size=0B cardinality=7.30K
+====
+# IMPALA-10473: AnalyticEvalNodes with constant Partition-by and Order-by exprs should be
+# computed in single unpartitioned fragments. In this case, it's placed in the coordinator
+# fragment which contains PlanRootSink.
+select row_number() over (partition by cast('5' as int) order by 1+3)
+from functional.alltypes
+---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
+01:ANALYTIC
+|  functions: row_number()
+|  partition by: 5
+|  order by: 4 ASC
+|  window: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+|  row-size=8B cardinality=7.30K
+|
+02:EXCHANGE [UNPARTITIONED]
+|
+00:SCAN HDFS [functional.alltypes]
+   HDFS partitions=24/24 files=24 size=478.45KB
+   row-size=0B cardinality=7.30K
+====
+# IMPALA-10473: AnalyticEvalNodes with constant Partition-by and Order-by exprs should be
+# computed in single unpartitioned fragments. In this case, row_number() is placed in the
+# coordinator fragment which contains PlanRootSink. count() has non-constant Partition-by
+# expr so is placed in a fragment that can be partitioned.
+select row_number() over (order by concat('ab', '')),
+  count() over (partition by int_col order by concat('a', 'b'))
+from functional.alltypes
+---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
+03:ANALYTIC
+|  functions: row_number()
+|  order by: 'ab' ASC
+|  window: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+|  row-size=20B cardinality=7.30K
+|
+05:EXCHANGE [UNPARTITIONED]
+|
+02:ANALYTIC
+|  functions: count()
+|  partition by: int_col
+|  order by: 'ab' ASC
+|  window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+|  row-size=12B cardinality=7.30K
+|
+01:SORT
+|  order by: int_col ASC NULLS LAST, 'ab' ASC
+|  row-size=4B cardinality=7.30K
+|
+04:EXCHANGE [HASH(int_col)]
+|
+00:SCAN HDFS [functional.alltypes]
+   HDFS partitions=24/24 files=24 size=478.45KB
+   row-size=4B cardinality=7.30K
+====
+# IMPALA-10473: AnalyticEvalNodes with constant Partition-by and Order-by exprs should be
+# computed in single unpartitioned fragments. In this case, it's placed in the coordinator
+# fragment which contains PlanRootSink.
+with v as (
+  select row_number() over (order by 'a') as rn from functional.alltypes
+)
+select count(distinct rn) from v
+---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
+03:AGGREGATE [FINALIZE]
+|  output: count(rn)
+|  row-size=8B cardinality=1
+|
+02:AGGREGATE
+|  group by: row_number()
+|  row-size=8B cardinality=1
+|
+01:ANALYTIC
+|  functions: row_number()
+|  order by: 'a' ASC
+|  window: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+|  row-size=8B cardinality=7.30K
+|
+04:EXCHANGE [UNPARTITIONED]
+|
+00:SCAN HDFS [functional.alltypes]
+   HDFS partitions=24/24 files=24 size=478.45KB
+   row-size=0B cardinality=7.30K
+====
+# IMPALA-10473: AnalyticEvalNodes with constant Partition-by and Order-by exprs should be
+# computed in single unpartitioned fragments. In this case, it's placed in the coordinator
+# fragment which contains PlanRootSink.
+with v as (
+  select row_number() over (partition by cast('1' as int) order by concat('a', 'b')) as rn
+  from functional.alltypes
+)
+select count(distinct rn) from v
+---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
+03:AGGREGATE [FINALIZE]
+|  output: count(rn)
+|  row-size=8B cardinality=1
+|
+02:AGGREGATE
+|  group by: row_number()
+|  row-size=8B cardinality=1
+|
+01:ANALYTIC
+|  functions: row_number()
+|  partition by: 1
+|  order by: 'ab' ASC
+|  window: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+|  row-size=8B cardinality=7.30K
+|
+04:EXCHANGE [UNPARTITIONED]
+|
+00:SCAN HDFS [functional.alltypes]
+   HDFS partitions=24/24 files=24 size=478.45KB
+   row-size=0B cardinality=7.30K
+====
diff --git a/testdata/workloads/functional-query/queries/QueryTest/analytic-fns.test b/testdata/workloads/functional-query/queries/QueryTest/analytic-fns.test
index 45feae0..aa10e1b 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/analytic-fns.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/analytic-fns.test
@@ -2194,3 +2194,22 @@ TINYINT
 0
 0
 ====
+---- QUERY
+# IMPALA-10473: AnalyticEvalNodes with constant Partition-by and Order-by exprs should be
+# computed in single unpartitioned fragments.
+# Note that this test is ran with "set exec_single_node_rows_threshold=0" which is in the
+# default test dimension. Without this setting, we need a larger table to cover the bug.
+select row_number() over (order by 'a'), count() over (order by 0)
+from alltypestiny
+---- TYPES
+BIGINT,BIGINT
+---- RESULTS
+1,8
+2,8
+3,8
+4,8
+5,8
+6,8
+7,8
+8,8
+====


[impala] 01/02: IMPALA-10389: impala-profile-tool container

Posted by st...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 93d4348b543a74248af9a64757fffe3d9a158648
Author: Tim Armstrong <ta...@cloudera.com>
AuthorDate: Mon Feb 1 09:31:37 2021 -0800

    IMPALA-10389: impala-profile-tool container
    
    Add a build step for an impala-profile-tool docker image
    that makes it easy to run the binary on any system.
    
    This container is automatically built as part of the
    docker build.
    
    This sets up a new build context that doesn't pull in all of
    the same dependencies or depend on the Java build
    
    Testing:
    
      cat logs/cluster/profiles/* | \
        docker run -i impala_profile_tool
    
    I uploaded a build of the container to dockerhub too:
    
      timgarmstrong/impala_profile_tool
    
    Change-Id: I36915cd686ab930dcc934bc0c81bff8c16d46714
    Reviewed-on: http://gerrit.cloudera.org:8080/17015
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 docker/CMakeLists.txt                 |  52 +++++++++++++---
 docker/impala_profile_tool/Dockerfile |  57 ++++++++++++++++++
 docker/setup_build_context.py         | 110 +++++++++++++++++++++-------------
 docker/utility_entrypoint.sh          |  36 +++++++++++
 4 files changed, 206 insertions(+), 49 deletions(-)

diff --git a/docker/CMakeLists.txt b/docker/CMakeLists.txt
index 7fe085b..23efee8 100644
--- a/docker/CMakeLists.txt
+++ b/docker/CMakeLists.txt
@@ -19,6 +19,10 @@ set(IMPALA_BASE_BUILD_CONTEXT_DIR
   ${CMAKE_SOURCE_DIR}/docker/build_context
 )
 
+set(IMPALA_UTILITY_BUILD_CONTEXT_DIR
+  ${CMAKE_SOURCE_DIR}/docker/build_context_utility
+)
+
 set(DOCKER_BUILD ${CMAKE_SOURCE_DIR}/docker/docker-build.sh)
 
 find_program(LSB_RELEASE_EXEC lsb_release)
@@ -79,7 +83,7 @@ if (NOT ${DISTRO_BASE_IMAGE} STREQUAL "UNSUPPORTED")
   add_custom_target(docker_images)
   add_custom_target(docker_debug_images)
 
-  set(daemon_image_names "")
+  set(exported_image_names "")
 
   # Add a target with name 'target' to build a daemon image for the daemon with
   # name 'daemon_name', e.g. "impalad_executor". The image is tagged as 'image_name'.
@@ -98,7 +102,7 @@ if (NOT ${DISTRO_BASE_IMAGE} STREQUAL "UNSUPPORTED")
       COMMENT "Building ${image_name} docker image."
       VERBATIM
     )
-    set(daemon_image_names "${daemon_image_names} ${image_name}" PARENT_SCOPE)
+    set(exported_image_names "${exported_image_names} ${image_name}" PARENT_SCOPE)
   endfunction()
 
   # Add debug and release docker image targets for the given daemon e.g. if called
@@ -113,7 +117,7 @@ if (NOT ${DISTRO_BASE_IMAGE} STREQUAL "UNSUPPORTED")
     add_daemon_docker_image(${debug_target} ${daemon_name} ${debug_image} debug)
     ADD_DEPENDENCIES(docker_images ${release_target})
     ADD_DEPENDENCIES(docker_debug_images ${debug_target})
-    set(daemon_image_names "${daemon_image_names} ${release_image}" PARENT_SCOPE)
+    set(exported_image_names "${exported_image_names} ${release_image}" PARENT_SCOPE)
   endfunction()
 
   # Stamp out image targets for all of the Impala daemons.
@@ -124,9 +128,6 @@ if (NOT ${DISTRO_BASE_IMAGE} STREQUAL "UNSUPPORTED")
   add_daemon_docker_images(statestored)
   add_daemon_docker_images(admissiond)
 
-  # Generate a text file with all of the release daemon images.
-  file(WRITE ${CMAKE_SOURCE_DIR}/docker/docker-images.txt "${daemon_image_names}")
-
   # HMS quickstart image, which requires Hive and Hadoop builds.
   set(quickstart_hms_build_dir ${CMAKE_SOURCE_DIR}/docker/quickstart_hms)
   add_custom_target(quickstart_hms_build_setup
@@ -155,5 +156,42 @@ if (NOT ${DISTRO_BASE_IMAGE} STREQUAL "UNSUPPORTED")
     COMMENT "Building quickstart client docker image."
     VERBATIM
   )
-endif()
 
+  # Add a target to build utility docker images for 'build_type'. 'build_context_args' are
+  # passed to the setup_build_context.py script.
+  function(add_utility_images profile_tool_target profile_tool_image build_type build_context_args)
+    # Build context depends on daemons and frontend jars.
+    # Sending the whole impala workspace including test binaries, testdata, etc
+    # to the docker daemon can be very expensive, so we create a build context
+    # with symlinks
+    add_custom_target(impala_utility_build_context_${build_type}
+      COMMAND ${CMAKE_SOURCE_DIR}/docker/setup_build_context.py ${build_context_args} --utility-context
+      DEPENDS impala-profile-tool ${CMAKE_SOURCE_DIR}/docker/setup_build_context.py
+      COMMENT "Creating impala utility build context build_type=${build_type}."
+      VERBATIM
+    )
+    # Target for the Impala profile tool image.
+    add_custom_target(${profile_tool_target}
+      # Run docker build inside the build context directory so that all dependencies are
+      # sent to the docker daemon. This allows the Dockerfile build to copy all necessary
+      # dependencies.
+      COMMAND tar cvh . -C ${CMAKE_SOURCE_DIR}/docker/impala_profile_tool/ . |
+              ${DOCKER_BUILD} -t ${profile_tool_image}
+                  --build-arg BASE_IMAGE=${DISTRO_BASE_IMAGE} -
+      WORKING_DIRECTORY ${IMPALA_UTILITY_BUILD_CONTEXT_DIR}/${build_type}
+      DEPENDS impala_utility_build_context_${build_type} ${CMAKE_SOURCE_DIR}/docker/impala_profile_tool/Dockerfile
+      DEPENDS ${CMAKE_SOURCE_DIR}/docker/utility_entrypoint.sh
+      COMMENT "Building Impala profile tool docker image build_type=${build_type}."
+      VERBATIM
+    )
+    set(exported_image_names "${exported_image_names} ${profile_tool_target}" PARENT_SCOPE)
+  endfunction()
+  add_utility_images(impala_profile_tool_image impala_profile_tool release "")
+  add_utility_images(impala_profile_tool_debug_image impala_profile_tool_debug debug "--debug-build")
+
+  ADD_DEPENDENCIES(docker_images impala_profile_tool_image)
+  ADD_DEPENDENCIES(docker_debug_images impala_profile_tool_debug_image)
+
+  # Generate a text file with all of the release daemon images.
+  file(WRITE ${CMAKE_SOURCE_DIR}/docker/docker-images.txt "${exported_image_names}")
+endif()
diff --git a/docker/impala_profile_tool/Dockerfile b/docker/impala_profile_tool/Dockerfile
new file mode 100644
index 0000000..3a55b85
--- /dev/null
+++ b/docker/impala_profile_tool/Dockerfile
@@ -0,0 +1,57 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+ARG BASE_IMAGE=ubuntu:16.04
+FROM ${BASE_IMAGE}
+
+# Install dependencies required for Impala utility binaries to run, plus
+# some useful utilities.
+# TODO: ideally we wouldn't depend on the JVM libraries, but currently the JNI code
+# in be/ is not cleanly separated from the code that doesn't use JNI.
+RUN apt-get update && \
+  apt-get install -y openjdk-8-jre-headless \
+  libsasl2-2 libsasl2-modules libsasl2-modules-gssapi-mit \
+  sudo netcat-openbsd less curl iproute2 vim iputils-ping \
+  krb5-user && \
+  apt-get clean && \
+  rm -rf /var/lib/apt/lists/*
+
+# Use a non-privileged impala user to run the processes in the container.
+# That user should own everything in the /opt/impala subdirectory.
+RUN groupadd -r impala -g 1000 && useradd --no-log-init -r -u 1000 -g 1000 impala && \
+    mkdir -p /opt/impala && chown impala /opt/impala && \
+    chmod ugo+w /etc/passwd
+USER impala
+
+# Copy build artifacts required for the utilities.
+# Need to have multiple copy commands to preserve directory structure.
+COPY --chown=impala bin /opt/impala/bin
+COPY --chown=impala lib /opt/impala/lib
+
+WORKDIR /opt/impala/
+
+ENTRYPOINT ["/opt/impala/bin/utility_entrypoint.sh", "/opt/impala/bin/impala-profile-tool",\
+     "-logtostderr"]
+
+LABEL name="Apache Impala Profile Tool" \
+      description="Tool for working with Impala profiles." \
+      # Common labels.
+      org.label-schema.maintainer=$MAINTAINER \
+      org.label-schema.url=$URL \
+      org.label-schema.vcs-ref=$VCS_REF \
+      org.label-schema.vcs-type=$VCS_TYPE \
+      org.label-schema.vcs-url=$VCS_URL \
+      org.label-schema.version=$VERSION
diff --git a/docker/setup_build_context.py b/docker/setup_build_context.py
index c1164e3..037ed65 100755
--- a/docker/setup_build_context.py
+++ b/docker/setup_build_context.py
@@ -29,6 +29,9 @@ from subprocess import check_call
 parser = argparse.ArgumentParser()
 parser.add_argument("--debug-build", help="Setup build context for debug build",
                     action="store_true")
+parser.add_argument("--utility-context",
+                    help="Setup utility build context instead of daemon",
+                    action="store_true")
 args = parser.parse_args()
 
 IMPALA_HOME = os.environ["IMPALA_HOME"]
@@ -36,7 +39,10 @@ if args.debug_build:
   BUILD_TYPE = "debug"
 else:
   BUILD_TYPE = "release"
-OUTPUT_DIR = os.path.join(IMPALA_HOME, "docker/build_context", BUILD_TYPE)
+if args.utility_context:
+  OUTPUT_DIR = os.path.join(IMPALA_HOME, "docker/build_context_utility", BUILD_TYPE)
+else:
+  OUTPUT_DIR = os.path.join(IMPALA_HOME, "docker/build_context", BUILD_TYPE)
 
 IMPALA_TOOLCHAIN_PACKAGES_HOME = os.environ["IMPALA_TOOLCHAIN_PACKAGES_HOME"]
 IMPALA_GCC_VERSION = os.environ["IMPALA_GCC_VERSION"]
@@ -66,10 +72,16 @@ LIB_DIR = os.path.join(OUTPUT_DIR, "lib")
 # The statestore does not require any jar files since it does not run an embedded JVM.
 STATESTORE_LIB_DIR = os.path.join(OUTPUT_DIR, "statestore-lib")
 
+# We generate multiple library directories for the build context for daemons,
+# but only a single one for the utility build context.
+if args.utility_context:
+  TARGET_LIB_DIRS = [LIB_DIR]
+else:
+  TARGET_LIB_DIRS = [LIB_DIR, EXEC_LIB_DIR, STATESTORE_LIB_DIR]
+
 os.mkdir(BIN_DIR)
-os.mkdir(EXEC_LIB_DIR)
-os.mkdir(LIB_DIR)
-os.mkdir(STATESTORE_LIB_DIR)
+for lib_dir in TARGET_LIB_DIRS:
+  os.mkdir(lib_dir)
 
 
 def symlink_file_into_dir(src_file, dst_dir):
@@ -92,20 +104,29 @@ def strip_debug_symbols(src_file, dst_dirs):
 
 # Impala binaries and native dependencies.
 
+
 # Strip debug symbols from release build to reduce image size. Keep them for
 # debug build.
-IMPALAD_BINARY = os.path.join(IMPALA_HOME, "be/build", BUILD_TYPE, "service/impalad")
-if args.debug_build:
-  symlink_file_into_dir(IMPALAD_BINARY, BIN_DIR)
+if args.utility_context:
+  PROFILE_TOOL_BINARY = os.path.join(
+      IMPALA_HOME, "be/build", BUILD_TYPE, "util/impala-profile-tool")
+  if args.debug_build:
+    symlink_file_into_dir(PROFILE_TOOL_BINARY, BIN_DIR)
+  else:
+    strip_debug_symbols(PROFILE_TOOL_BINARY, [BIN_DIR])
 else:
-  strip_debug_symbols(IMPALAD_BINARY, [BIN_DIR])
+  IMPALAD_BINARY = os.path.join(IMPALA_HOME, "be/build", BUILD_TYPE, "service/impalad")
+  if args.debug_build:
+    symlink_file_into_dir(IMPALAD_BINARY, BIN_DIR)
+  else:
+    strip_debug_symbols(IMPALAD_BINARY, [BIN_DIR])
 
 # Add libstc++ binaries to LIB_DIR. Strip debug symbols for release builds.
 for libstdcpp_so in glob.glob(os.path.join(
     GCC_HOME, "lib64/{0}*.so*".format("libstdc++"))):
   # Ignore 'libstdc++.so.*-gdb.py'.
   if not os.path.basename(libstdcpp_so).endswith(".py"):
-    dst_dirs = [LIB_DIR, EXEC_LIB_DIR, STATESTORE_LIB_DIR]
+    dst_dirs = TARGET_LIB_DIRS
     if args.debug_build:
       symlink_file_into_dirs(libstdcpp_so, dst_dirs)
     else:
@@ -113,44 +134,49 @@ for libstdcpp_so in glob.glob(os.path.join(
 
 # Add libgcc binaries to LIB_DIR.
 for libgcc_so in glob.glob(os.path.join(GCC_HOME, "lib64/{0}*.so*".format("libgcc_s"))):
-  symlink_file_into_dirs(libgcc_so, [LIB_DIR, EXEC_LIB_DIR, STATESTORE_LIB_DIR])
+  symlink_file_into_dirs(libgcc_so, TARGET_LIB_DIRS)
 
 # Add libkudu_client binaries to LIB_DIR. Strip debug symbols for release builds.
 for kudu_client_so in glob.glob(os.path.join(KUDU_LIB_DIR, "libkudu_client.so*")):
-  # For some reason, statestored requires libkudu_client.so.
-  dst_dirs = [LIB_DIR, EXEC_LIB_DIR, STATESTORE_LIB_DIR]
+  # All backend binaries currently link against libkudu_client.so even if they don't need
+  # them.
+  dst_dirs = TARGET_LIB_DIRS
   if args.debug_build:
     symlink_file_into_dirs(kudu_client_so, dst_dirs)
   else:
     strip_debug_symbols(kudu_client_so, dst_dirs)
 
-# Impala Coordinator dependencies.
-dep_classpath = file(os.path.join(IMPALA_HOME, "fe/target/build-classpath.txt")).read()
-for jar in dep_classpath.split(":"):
-  assert os.path.exists(jar), "missing jar from classpath: {0}".format(jar)
-  symlink_file_into_dir(jar, LIB_DIR)
-
-# Impala Coordinator jars.
-num_frontend_jars = 0
-for jar in glob.glob(os.path.join(IMPALA_HOME, "fe/target/impala-frontend-*.jar")):
-  # Ignore the tests jar
-  if jar.find("-tests") != -1:
-    continue
-  symlink_file_into_dir(jar, LIB_DIR)
-  num_frontend_jars += 1
-# There must be exactly one impala-frontend jar.
-assert num_frontend_jars == 1
-
-# Impala Executor dependencies.
-dep_classpath = file(os.path.join(IMPALA_HOME,
-    "java/executor-deps/target/build-executor-deps-classpath.txt")).read()
-for jar in dep_classpath.split(":"):
-  assert os.path.exists(jar), "missing jar from classpath: {0}".format(jar)
-  symlink_file_into_dir(jar, EXEC_LIB_DIR)
-
-# Templates for debug web pages.
-os.symlink(os.path.join(IMPALA_HOME, "www"), os.path.join(OUTPUT_DIR, "www"))
-# Scripts
-symlink_file_into_dir(os.path.join(IMPALA_HOME, "docker/daemon_entrypoint.sh"), BIN_DIR)
-symlink_file_into_dir(os.path.join(IMPALA_HOME, "bin/graceful_shutdown_backends.sh"),
-                      BIN_DIR)
+if args.utility_context:
+  symlink_file_into_dir(
+      os.path.join(IMPALA_HOME, "docker/utility_entrypoint.sh"), BIN_DIR)
+else:
+  # Impala Coordinator dependencies.
+  dep_classpath = file(os.path.join(IMPALA_HOME, "fe/target/build-classpath.txt")).read()
+  for jar in dep_classpath.split(":"):
+    assert os.path.exists(jar), "missing jar from classpath: {0}".format(jar)
+    symlink_file_into_dir(jar, LIB_DIR)
+
+  # Impala Coordinator jars.
+  num_frontend_jars = 0
+  for jar in glob.glob(os.path.join(IMPALA_HOME, "fe/target/impala-frontend-*.jar")):
+    # Ignore the tests jar
+    if jar.find("-tests") != -1:
+      continue
+    symlink_file_into_dir(jar, LIB_DIR)
+    num_frontend_jars += 1
+  # There must be exactly one impala-frontend jar.
+  assert num_frontend_jars == 1
+
+  # Impala Executor dependencies.
+  dep_classpath = file(os.path.join(IMPALA_HOME,
+      "java/executor-deps/target/build-executor-deps-classpath.txt")).read()
+  for jar in dep_classpath.split(":"):
+    assert os.path.exists(jar), "missing jar from classpath: {0}".format(jar)
+    symlink_file_into_dir(jar, EXEC_LIB_DIR)
+
+  # Templates for debug web pages.
+  os.symlink(os.path.join(IMPALA_HOME, "www"), os.path.join(OUTPUT_DIR, "www"))
+  # Scripts
+  symlink_file_into_dir(os.path.join(IMPALA_HOME, "docker/daemon_entrypoint.sh"), BIN_DIR)
+  symlink_file_into_dir(os.path.join(IMPALA_HOME, "bin/graceful_shutdown_backends.sh"),
+                        BIN_DIR)
diff --git a/docker/utility_entrypoint.sh b/docker/utility_entrypoint.sh
new file mode 100755
index 0000000..0fd8785
--- /dev/null
+++ b/docker/utility_entrypoint.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Wrapper script that runs the command-line provided as its arguments after
+# setting up the environment required for utilities like impala-profile-tool
+# to run.
+
+export IMPALA_HOME=/opt/impala
+
+# Add directories containing dynamic libraries required by the daemons that
+# are not on the system library paths.
+export LD_LIBRARY_PATH=/opt/impala/lib
+LD_LIBRARY_PATH+=:/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/amd64/
+LD_LIBRARY_PATH+=:/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/amd64/server/
+
+echo "LD_LIBRARY_PATH: $LD_LIBRARY_PATH"
+
+# Set ulimit core file size 0.
+ulimit -c 0
+
+exec "$@"