You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@impala.apache.org by ta...@apache.org on 2021/02/10 07:19:49 UTC

[impala] branch master updated (1f7b413 -> 79bee3b)

This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git.


    from 1f7b413  IMPALA-8721: re-enable test_hive_impala_interop
     new f888d36  IMPALA-10397 : Reduce flakiness in test_single_workload
     new 79bee3b  IMPALA-10469: push quickstart to apache repo

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 docker/CMakeLists.txt                     | 13 +++--
 docker/README.md                          | 26 +++++++---
 docker/publish_images_to_apache.sh        | 80 +++++++++++++++++++++++++++++++
 docker/quickstart-load-data.yml           |  2 +-
 docker/quickstart.yml                     |  8 ++--
 tests/custom_cluster/test_auto_scaling.py | 25 ++++++----
 6 files changed, 131 insertions(+), 23 deletions(-)
 create mode 100755 docker/publish_images_to_apache.sh

[impala] 02/02: IMPALA-10469: push quickstart to apache repo

Posted by ta...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 79bee3befbc6cdcd358373822a0a3b4d19ab5ce0
Author: Tim Armstrong <ta...@cloudera.com>
AuthorDate: Fri Feb 5 15:35:30 2021 -0800

    IMPALA-10469: push quickstart to apache repo
    
    This adds a script, docker/publish_images_to_apache.sh,
    that allows uploading images to the apache/impala docker hub
    repo, prefixed with a version string. E.g. with the following
    commands:
    
      ninja docker_images quickstart_docker_images
      ./docker/publish_images_to_apache.sh -v 81d5377c2
    
    The uploaded images can then be used for the quickstart cluster,
    as documented in docker/README.
    
    Updated docs for quickstart to use a prefix from apache/impala
    
    Remove IMPALA_QUICKSTART_VERSION, which doesn't interact well with
    the tagging since the image name and version are now encoded in the
    tag.
    
    Fix an incorrect image name added to docker-images.txt:
    impala_profile_tool_image.
    
    Testing:
    Ran Impala quickstart with data loading using instructions in README.
    
      export IMPALA_QUICKSTART_IMAGE_PREFIX="apache/impala:81d5377c2-"
      docker network create -d bridge quickstart-network
      export QUICKSTART_IP=$(docker network inspect quickstart-network -f '{{(index .IPAM.Config 0).Gateway}}')
      export QUICKSTART_LISTEN_ADDR=$QUICKSTART_IP
    
      docker-compose -f docker/quickstart.yml \
          -f docker/quickstart-kudu-minimal.yml \
          -f docker/quickstart-load-data.yml up -d
    
      docker run --network=quickstart-network -it \
           ${IMPALA_QUICKSTART_IMAGE_PREFIX}impala_quickstart_client
           impala-shell
    
    Change-Id: I535d77e565b73d732ae511d7525193467086c76a
    Reviewed-on: http://gerrit.cloudera.org:8080/17030
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 docker/CMakeLists.txt              | 13 +++++--
 docker/README.md                   | 26 ++++++++++---
 docker/publish_images_to_apache.sh | 80 ++++++++++++++++++++++++++++++++++++++
 docker/quickstart-load-data.yml    |  2 +-
 docker/quickstart.yml              |  8 ++--
 5 files changed, 115 insertions(+), 14 deletions(-)

diff --git a/docker/CMakeLists.txt b/docker/CMakeLists.txt
index 23efee8..9b45a11a 100644
--- a/docker/CMakeLists.txt
+++ b/docker/CMakeLists.txt
@@ -82,6 +82,7 @@ if (NOT ${DISTRO_BASE_IMAGE} STREQUAL "UNSUPPORTED")
   # instantiated below.
   add_custom_target(docker_images)
   add_custom_target(docker_debug_images)
+  add_custom_target(quickstart_docker_images)
 
   set(exported_image_names "")
 
@@ -129,6 +130,7 @@ if (NOT ${DISTRO_BASE_IMAGE} STREQUAL "UNSUPPORTED")
   add_daemon_docker_images(admissiond)
 
   # HMS quickstart image, which requires Hive and Hadoop builds.
+  set(QUICKSTART_HMS_IMAGE impala_quickstart_hms)
   set(quickstart_hms_build_dir ${CMAKE_SOURCE_DIR}/docker/quickstart_hms)
   add_custom_target(quickstart_hms_build_setup
     COMMAND rm -f ${quickstart_hms_build_dir}/hive ${quickstart_hms_build_dir}/hadoop
@@ -139,23 +141,28 @@ if (NOT ${DISTRO_BASE_IMAGE} STREQUAL "UNSUPPORTED")
     # Supply the appropriate base image as an argument for the Dockerfile.
     # Use tar with -h flag to assemble a tarball including all the symlinked files and
     # directories in the build context.
-    COMMAND tar cvh . -C ${quickstart_hms_build_dir} . | ${DOCKER_BUILD} --build-arg BASE_IMAGE=${DISTRO_BASE_IMAGE} -t impala_quickstart_hms -
+    COMMAND tar cvh . -C ${quickstart_hms_build_dir} . | ${DOCKER_BUILD} --build-arg BASE_IMAGE=${DISTRO_BASE_IMAGE} -t ${QUICKSTART_HMS_IMAGE} -
     DEPENDS ${quickstart_hms_build_dir}/Dockerfile quickstart_hms_build_setup
     COMMENT "Building quickstart HMS docker image."
     VERBATIM
   )
+  ADD_DEPENDENCIES(quickstart_docker_images quickstart_hms_image)
+  set(exported_image_names "${exported_image_names} ${QUICKSTART_HMS_IMAGE}")
 
   # Client quickstart image, which only requires some scripts.
+  set(QUICKSTART_CLIENT_IMAGE impala_quickstart_client)
   set(quickstart_client_build_dir ${CMAKE_SOURCE_DIR}/docker/quickstart_client)
   add_custom_target(quickstart_client_image
     # Supply the appropriate base image as an argument for the Dockerfile.
     # Use tar with -h flag to assemble a tarball including all the symlinked files and
     # directories in the build context.
-    COMMAND tar cvh . -C ${quickstart_client_build_dir} . | ${DOCKER_BUILD} ${COMMON_DOCKER_BUILD_ARGS} --build-arg BASE_IMAGE=${DISTRO_BASE_IMAGE} -t impala_quickstart_client -
+    COMMAND tar cvh . -C ${quickstart_client_build_dir} . | ${DOCKER_BUILD} ${COMMON_DOCKER_BUILD_ARGS} --build-arg BASE_IMAGE=${DISTRO_BASE_IMAGE} -t ${QUICKSTART_CLIENT_IMAGE} -
     DEPENDS ${quickstart_client_build_dir}/Dockerfile ${quickstart_client_build_dir}/data-load-entrypoint.sh
     COMMENT "Building quickstart client docker image."
     VERBATIM
   )
+  ADD_DEPENDENCIES(quickstart_docker_images quickstart_client_image)
+  set(exported_image_names "${exported_image_names} ${QUICKSTART_CLIENT_IMAGE}")
 
   # Add a target to build utility docker images for 'build_type'. 'build_context_args' are
   # passed to the setup_build_context.py script.
@@ -184,7 +191,7 @@ if (NOT ${DISTRO_BASE_IMAGE} STREQUAL "UNSUPPORTED")
       COMMENT "Building Impala profile tool docker image build_type=${build_type}."
       VERBATIM
     )
-    set(exported_image_names "${exported_image_names} ${profile_tool_target}" PARENT_SCOPE)
+    set(exported_image_names "${exported_image_names} ${profile_tool_image}" PARENT_SCOPE)
   endfunction()
   add_utility_images(impala_profile_tool_image impala_profile_tool release "")
   add_utility_images(impala_profile_tool_debug_image impala_profile_tool_debug debug "--debug-build")
diff --git a/docker/README.md b/docker/README.md
index e56d8fa..9ea95c8 100644
--- a/docker/README.md
+++ b/docker/README.md
@@ -35,11 +35,11 @@ If you want the cluster to be open to connections from other hosts, you can set
 export QUICKSTART_LISTEN_ADDR=0.0.0.0
 ```
 
-You can optionally set `IMPALA_QUICKSTART_IMAGE_PREFIX` to pull prebuilt images from a DockerHub repo,
-for example:
+You can optionally set `IMPALA_QUICKSTART_IMAGE_PREFIX` to pull prebuilt images from a DockerHub repo.
+For example, the following will use images like `apache/impala:81d5377c2-impalad_coordinator`:
 
 ```bash
-  export IMPALA_QUICKSTART_IMAGE_PREFIX="timgarmstrong/"
+  export IMPALA_QUICKSTART_IMAGE_PREFIX="apache/impala:81d5377c2-"
 ```
 
 Leave `IMPALA_QUICKSTART_IMAGE_PREFIX` unset to use images built from a local Impala dev environment.
@@ -143,9 +143,23 @@ The following environment variables influence the behaviour of the various
 quickstart docker compose files.
 * `KUDU_QUICKSTART_VERSION` - defaults to latest, can be overridden to a
   different tag to use different Kudu images.
-* `IMPALA_QUICKSTART_VERSION` - defaults to latest, can be overridden to a
-  different tag to use different Impala images.
 * `IMPALA_QUICKSTART_IMAGE_PREFIX` - defaults to using local images, change to
-  `"timgarmstrong/"` to use my prebuilt images.
+   to a different prefix to pick up prebuilt images.
 * `QUICKSTART_LISTEN_ADDR` - can be set to either `$QUICKSTART_IP` to listen on
   only the docker network interface, or `0.0.0.0` to listen on all interfaces.
+
+# Publishing Quickstart Docker Images (for developers)
+To publish the images you need to build locally then run `publish_images_to_apache.sh`
+to tag and push them to a docker repository. For example, to tag the images with the
+current commit hash and upload them to the default `apache/impala` Docker repository,
+you can run the following commands:
+
+```bash
+cd $IMPALA_HOME
+IMAGE_VERSION=$(git rev-parse --short HEAD)
+./buildall.sh -release -noclean -ninja -skiptests -notests
+ninja docker_images quickstart_docker_images
+./docker/publish_images_to_apache.sh -v ${IMAGE_VERSION} -
+```
+
+For official Impala releases you will want to use the release version instead.
diff --git a/docker/publish_images_to_apache.sh b/docker/publish_images_to_apache.sh
new file mode 100755
index 0000000..f0a2e64
--- /dev/null
+++ b/docker/publish_images_to_apache.sh
@@ -0,0 +1,80 @@
+#!/bin/bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -euo pipefail
+
+usage() {
+  echo "publish_container_to_apache.sh -v <version string> [-d] [-r <repo>"
+  echo "  -d: if specified, upload debug images instead of release images"
+  echo "  -r: docker repository to upload to (defaults to apache/impala)"
+  echo "  -v: version string to tag upload with, e.g. git hash or release version"
+}
+
+SCRIPT_DIR=$(cd $(dirname "$0") && pwd)
+
+# Arguments to use for grep when filtering docker-images.txt
+IMAGE_GREP_FILTER_ARGS="-v _debug"
+
+VERSION=""
+
+TARGET_REPO="apache/impala"
+
+while getopts "r:v:" flag
+do
+    case "${flag}" in
+        r) TARGET_REPO="$OPTARG"
+          ;;
+        v) VERSION="$OPTARG"
+          ;;
+        *)
+          usage
+          exit 1
+    esac
+done
+
+if [[ "$VERSION" = "" ]]; then
+  echo "-v must be provided"
+  usage
+  exit 1
+fi
+
+# Include the published images, filtering out debug/release as needed.
+IMAGES=$(cat "$SCRIPT_DIR/docker-images.txt" | tr ' ' '\n' |\
+    grep $IMAGE_GREP_FILTER_ARGS | sort | uniq | tr '\n' ' ')
+IMAGES+=" impala_quickstart_client impala_quickstart_hms"
+
+echo "Docker images to publish: $IMAGES"
+echo "Version string: '$VERSION'"
+read -p "Continue with upload to $TARGET_REPO [y/N]? "
+if [[ ! $REPLY =~ ^[Yy]$ ]]
+then
+  exit 0
+fi
+
+for IMAGE in $IMAGES
+do
+  # Prefix the image with the version so that the set of images can be identified
+  # with a prefix, e.g. IMPALA_QUICKSTART_IMAGE_PREFIX in the quickstart docker compose.
+  DST="${TARGET_REPO}:${VERSION}-${IMAGE}"
+  DIGEST=$(docker images --no-trunc --quiet "${IMAGE}")
+  echo "Publishing ${IMAGE} (${DIGEST}) to ${DST}"
+  docker tag $IMAGE "$DST"
+  docker push "$DST"
+done
+
diff --git a/docker/quickstart-load-data.yml b/docker/quickstart-load-data.yml
index d247510..f0e7eec 100644
--- a/docker/quickstart-load-data.yml
+++ b/docker/quickstart-load-data.yml
@@ -20,7 +20,7 @@
 version: "3"
 services:
   data-loader:
-    image: ${IMPALA_QUICKSTART_IMAGE_PREFIX:-}impala_quickstart_client:${IMPALA_QUICKSTART_VERSION:-latest}
+    image: ${IMPALA_QUICKSTART_IMAGE_PREFIX:-}impala_quickstart_client
     depends_on:
       - impalad-1
     command: ["load_tpcds"]
diff --git a/docker/quickstart.yml b/docker/quickstart.yml
index e50056f..5f97934 100644
--- a/docker/quickstart.yml
+++ b/docker/quickstart.yml
@@ -25,7 +25,7 @@
 version: "3"
 services:
   hms:
-    image: ${IMPALA_QUICKSTART_IMAGE_PREFIX:-}impala_quickstart_hms:${IMPALA_QUICKSTART_VERSION:-latest}
+    image: ${IMPALA_QUICKSTART_IMAGE_PREFIX:-}impala_quickstart_hms
     # Give the HMS an explicit hostname to avoid issues with docker-compose-generated
     # hostnames including underscore, which is rejected by Java's URL parser.
     container_name: quickstart-hive-metastore
@@ -40,7 +40,7 @@ services:
     networks:
       - quickstart-network
   statestored:
-    image: ${IMPALA_QUICKSTART_IMAGE_PREFIX:-}statestored:${IMPALA_QUICKSTART_VERSION:-latest}
+    image: ${IMPALA_QUICKSTART_IMAGE_PREFIX:-}statestored
     ports:
       # Web debug UI
       - "${QUICKSTART_LISTEN_ADDR:?Please set QUICKSTART_LISTEN_ADDR environment variable}:25010:25010"
@@ -53,7 +53,7 @@ services:
     depends_on:
       - statestored
       - hms
-    image: ${IMPALA_QUICKSTART_IMAGE_PREFIX:-}catalogd:${IMPALA_QUICKSTART_VERSION:-latest}
+    image: ${IMPALA_QUICKSTART_IMAGE_PREFIX:-}catalogd
     ports:
       # Web debug UI
       - "${QUICKSTART_LISTEN_ADDR:?Please set QUICKSTART_LISTEN_ADDR environment variable}:25020:25020"
@@ -67,7 +67,7 @@ services:
     networks:
       - quickstart-network
   impalad-1:
-    image: ${IMPALA_QUICKSTART_IMAGE_PREFIX:-}impalad_coord_exec:${IMPALA_QUICKSTART_VERSION:-latest}
+    image: ${IMPALA_QUICKSTART_IMAGE_PREFIX:-}impalad_coord_exec
     depends_on:
       - statestored
       - catalogd

[impala] 01/02: IMPALA-10397 : Reduce flakiness in test_single_workload

Posted by ta...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit f888d362951454b273114c98686193498c0d3fe0
Author: Bikramjeet Vig <bi...@gmail.com>
AuthorDate: Fri Feb 5 13:40:07 2021 -0800

    IMPALA-10397 : Reduce flakiness in test_single_workload
    
    This test failed recently due to a timeout waiting for executors to
    come up. The logs showed that the executors came up on time but it
    was not recognized by the coordinator. This patch attempts to reduce
    flakiness by increasing the timeout and adding more logging in case
    this happens in the future.
    
    Testing:
    Ran in a loop on my local for a few hours.
    
    Change-Id: I73ea5eb663db6d03832b19ed323670590946f514
    Reviewed-on: http://gerrit.cloudera.org:8080/17028
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 tests/custom_cluster/test_auto_scaling.py | 25 ++++++++++++++++---------
 1 file changed, 16 insertions(+), 9 deletions(-)

diff --git a/tests/custom_cluster/test_auto_scaling.py b/tests/custom_cluster/test_auto_scaling.py
index 6f999b0..bbb7dd0 100644
--- a/tests/custom_cluster/test_auto_scaling.py
+++ b/tests/custom_cluster/test_auto_scaling.py
@@ -27,7 +27,7 @@ from tests.common.custom_cluster_test_suite import CustomClusterTestSuite
 from tests.common.skip import SkipIfEC
 
 LOG = logging.getLogger("test_auto_scaling")
-
+TOTAL_BACKENDS_METRIC_NAME = "cluster-membership.backends.total"
 
 class TestAutoScaling(CustomClusterTestSuite):
   @classmethod
@@ -43,7 +43,7 @@ class TestAutoScaling(CustomClusterTestSuite):
   """This class contains tests that exercise the logic related to scaling clusters up and
   down by adding and removing groups of executors."""
   INITIAL_STARTUP_TIME_S = 10
-  STATE_CHANGE_TIMEOUT_S = 45
+  STATE_CHANGE_TIMEOUT_S = 60
   # This query will scan two partitions (month = 1, 2) and thus will have 1 fragment
   # instance per executor on groups of size 2. Each partition has 2 rows, so it performs
   # two comparisons and should take around 1 second to complete.
@@ -51,13 +51,20 @@ class TestAutoScaling(CustomClusterTestSuite):
              and id + random() < sleep(500)"""
 
   def _get_total_admitted_queries(self):
-    return self.impalad_test_service.get_total_admitted_queries("default-pool")
+    admitted_queries = self.impalad_test_service.get_total_admitted_queries(
+      "default-pool")
+    LOG.info("Current total admitted queries: %s", admitted_queries)
+    return admitted_queries
 
   def _get_num_backends(self):
-    return self.impalad_test_service.get_metric_value("cluster-membership.backends.total")
+    metric_val = self.impalad_test_service.get_metric_value(TOTAL_BACKENDS_METRIC_NAME)
+    LOG.info("Getting metric %s : %s", TOTAL_BACKENDS_METRIC_NAME, metric_val)
+    return metric_val
 
   def _get_num_running_queries(self):
-    return self.impalad_test_service.get_num_running_queries("default-pool")
+    running_queries = self.impalad_test_service.get_num_running_queries("default-pool")
+    LOG.info("Current running queries: %s", running_queries)
+    return running_queries
 
   @SkipIfEC.fix_later
   def test_single_workload(self):
@@ -124,7 +131,7 @@ class TestAutoScaling(CustomClusterTestSuite):
 
       # Wait for workers to spin down
       self.impalad_test_service.wait_for_metric_value(
-        "cluster-membership.backends.total", 1,
+        TOTAL_BACKENDS_METRIC_NAME, 1,
         timeout=self.STATE_CHANGE_TIMEOUT_S, interval=1)
       assert self.impalad_test_service.get_metric_value(
         "cluster-membership.executor-groups.total") == 0
@@ -155,7 +162,7 @@ class TestAutoScaling(CustomClusterTestSuite):
       # Wait for workers to spin up
       cluster_size = GROUP_SIZE + 1  # +1 to include coordinator.
       self.impalad_test_service.wait_for_metric_value(
-        "cluster-membership.backends.total", cluster_size,
+        TOTAL_BACKENDS_METRIC_NAME, cluster_size,
         timeout=self.STATE_CHANGE_TIMEOUT_S, interval=1)
 
       # Wait until we admitted at least 10 queries
@@ -184,7 +191,7 @@ class TestAutoScaling(CustomClusterTestSuite):
 
       # Wait for workers to spin down
       self.impalad_test_service.wait_for_metric_value(
-        "cluster-membership.backends.total", 1,
+        TOTAL_BACKENDS_METRIC_NAME, 1,
         timeout=self.STATE_CHANGE_TIMEOUT_S, interval=1)
       assert self.impalad_test_service.get_metric_value(
         "cluster-membership.executor-groups.total") == 0
@@ -242,7 +249,7 @@ class TestAutoScaling(CustomClusterTestSuite):
 
       # Wait for workers to spin down
       self.impalad_test_service.wait_for_metric_value(
-        "cluster-membership.backends.total", 1,
+        TOTAL_BACKENDS_METRIC_NAME, 1,
         timeout=self.STATE_CHANGE_TIMEOUT_S, interval=1)
       assert self.impalad_test_service.get_metric_value(
         "cluster-membership.executor-groups.total") == 0