You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by bo...@apache.org on 2021/09/13 13:46:51 UTC

[impala] branch master updated: IMPALA-10904: Add some perf tools to the repository

This is an automated email from the ASF dual-hosted git repository.

boroknagyz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


The following commit(s) were added to refs/heads/master by this push:
     new 6d47927  IMPALA-10904: Add some perf tools to the repository
6d47927 is described below

commit 6d47927668e6e9accc3efd0d5bcd26ee9926cb39
Author: Zoltan Borok-Nagy <bo...@cloudera.com>
AuthorDate: Wed Sep 8 11:12:40 2021 +0200

    IMPALA-10904: Add some perf tools to the repository
    
    This patch adds two perf tools to the repository. Both can be used to
    generate flame graphs (https://www.brendangregg.com/flamegraphs.html).
    
    perf-record.sh:
    It samples the CPU stack traces for the entire system, or for a
    specific PID until the user hits Ctrl+C. It can be useful if the
    developer wants to take a look at what Impala is doing.
    The resulting flame graph is written to an SVG file.
    
    perf-query.sh:
    It takes a query string as a parameter and passes it to the impala
    shell to execute. While the query is executing the script samples
    the CPU stack traces for the entire system. The resulting flame
    graph is written to an SVG file.
    E.g.:
    perf-query.sh "select count(*) from tpch.lineitem group by l_returnflag"
    
    Change-Id: Ib3da696b939204d23c5285dcf1bf6ee3a3738415
    Reviewed-on: http://gerrit.cloudera.org:8080/17834
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 bin/perf_tools/perf-query.sh  | 88 +++++++++++++++++++++++++++++++++++++++++++
 bin/perf_tools/perf-record.sh | 84 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 172 insertions(+)

diff --git a/bin/perf_tools/perf-query.sh b/bin/perf_tools/perf-query.sh
new file mode 100755
index 0000000..8e44491
--- /dev/null
+++ b/bin/perf_tools/perf-query.sh
@@ -0,0 +1,88 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# This script measures the performance of a query, or a sequence of queries.
+# It visualizes the stack traces on a flame graph
+# (https://www.brendangregg.com/flamegraphs.html).
+#
+# It uses 'perf' and measures all processes in the system while the query
+# is executing, not just Impala.
+#
+# PREREQUISITIES:
+# * In directory ${IMPALA_TOOLCHAIN}: git clone https://github.com/brendangregg/FlameGraph
+# * Install 'perf' on your system (e.g. 'apt install linux-tools-generic')
+#
+# Usage:
+# perf-query.sh "<query>"
+# E.g.:
+# perf-query.sh "select count(*) from tpch.lineitem group by l_returnflag"
+# perf-query.sh "set MT_DOP=10; select count(*) from tpch.lineitem group by l_returnflag"
+#
+# Possible problems:
+# Sometimes 'perf record -g' is not able to correctly identify all the stack frames, hence
+# we cannot collapse everything together. Instead of '-g' we can use '--call-graph dwarf'
+# which does a better job, but later 'perf script' takes much longer time for
+# postprocessing the data. Interestingly 'perf-record.sh' doesn't seem to have this
+# problem.
+
+FLAME_GRAPH_DIR=${IMPALA_TOOLCHAIN}/FlameGraph
+
+if [ ! -d ${FLAME_GRAPH_DIR} ]; then
+  echo "ERROR: ${FLAME_GRAPH_DIR} does not exist."
+  echo "Please use 'git clone https://github.com/brendangregg/FlameGraph' in ${IMPALA_TOOLCHAIN}"
+  exit 1
+fi
+
+PATH="${FLAME_GRAPH_DIR}:${PATH}"
+
+if [ $# -eq 0 ]; then
+  echo "Please provide a query as an argument."
+  exit 1
+fi
+
+
+# We invoke 'sudo perf' in the background so let's do a blocking sudo now.
+sudo echo "test sudo"
+
+# Sample CPU stack traces (-g: via frame pointers) for the entire system, at 99 Hertz.
+sudo perf record -F 99 -g -a &
+perf_pid=$!
+
+~/Impala/bin/impala-shell.sh -q "$1"
+
+# Send interrupt to 'perf record'. We need to issue 'kill' in a new session/process
+# group via 'setsid', otherwise 'perf record' won't get the signal (because it's
+# running with sudo).
+sudo setsid kill -s INT ${perf_pid}
+wait ${perf_pid}
+
+if [ ! -f perf.data ]; then
+  echo "ERROR: 'perf.data' has not been generated."
+  exit 1
+fi
+
+# Create flame graph
+sudo perf script | stackcollapse-perf.pl > out.perf-folded
+flamegraph.pl out.perf-folded > perf_query.svg
+
+echo "Flame graph has been written to 'perf_query.svg'"
+
+# Open firefox to render the flame graph
+firefox perf_query.svg 2>/dev/null
+
diff --git a/bin/perf_tools/perf-record.sh b/bin/perf_tools/perf-record.sh
new file mode 100755
index 0000000..37217dc
--- /dev/null
+++ b/bin/perf_tools/perf-record.sh
@@ -0,0 +1,84 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# This script samples the CPU stack traces for the entire system, or
+# for a specific PID until it gets an interrupt signal (Ctrl+C).
+# It visualizes the stack traces on a flame graph:
+# https://www.brendangregg.com/flamegraphs.html
+#
+# It can be useful if we want to figure out what Impala is doing currently.
+#
+# PREREQUISITIES:
+# * In directory ${IMPALA_TOOLCHAIN}: git clone https://github.com/brendangregg/FlameGraph
+# * Install 'perf' on your system (e.g. 'apt install linux-tools-generic')
+#
+# Usage:
+# perf-record.sh # records events for the whole system until Ctrl+C is pressed
+# or
+# perf-record.sh <pid> # records perf event for specific process until Ctrl+C is pressed
+
+
+FLAME_GRAPH_DIR=${IMPALA_TOOLCHAIN}/FlameGraph
+
+if [ ! -d ${FLAME_GRAPH_DIR} ]; then
+  echo "ERROR: ${FLAME_GRAPH_DIR} does not exist."
+  echo "Please use 'git clone https://github.com/brendangregg/FlameGraph' in ${IMPALA_TOOLCHAIN}"
+  exit 1
+fi
+
+PATH="${FLAME_GRAPH_DIR}:${PATH}"
+
+# We trap the interrupt signal, so only 'perf record' gets interrupted while this script
+# continues, so it can create the flame graph.
+trap ctrl_c INT
+function ctrl_c() {
+  echo "Trapped Ctrl+C"
+}
+
+# Measure whole system if no args given. Otherwise we expect a pid as argument.
+if [ $# -eq 0 ]; then
+  echo "This script is going to record perf events for the whole system."
+  PERF_ARGS=-a
+else
+  if ps -p $1 >/dev/null 2>&1; then
+    echo "This script is going to record perf events for pid=$1"
+    PERF_ARGS="-p $1"
+  else
+    echo "ERROR: Process with pid=$1 does not exist."
+    exit 1
+  fi
+fi
+
+echo "perf record started... Hit Ctrl+C to stop sampling."
+# Sample CPU stack traces at 99 Hertz
+sudo perf record -F 99 -g ${PERF_ARGS}
+
+if [ ! -f perf.data ]; then
+  echo "ERROR: 'perf.data' has not been generated."
+  exit 1
+fi
+
+# Create the flame graph
+sudo perf script | stackcollapse-perf.pl > out.perf-folded
+flamegraph.pl out.perf-folded > perf_record.svg
+
+echo "Flame graph has been written to 'perf_record.svg'."
+
+# Render the flame graph in browser
+firefox perf_record.svg 2>/dev/null