You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by la...@apache.org on 2020/08/15 14:59:36 UTC
[kudu] 08/23: [script] Add script tools
This is an automated email from the ASF dual-hosted git repository.
laiyingchun pushed a commit to tag kudu-1.12.0-mdh1.0.0-4c2c075-centos-release
in repository https://gitbox.apache.org/repos/asf/kudu.git
commit e09b284b8fc41701ef017e3aacdfab5543332fe1
Author: laiyingchun <la...@xiaomi.com>
AuthorDate: Wed Jan 9 19:27:16 2019 +0800
[script] Add script tools
---
kudu | 1 +
src/kudu/scripts/batch_operate_on_tables.sh | 68 ++++
src/kudu/scripts/build_env.sh | 24 ++
src/kudu/scripts/build_kudu.sh | 162 ++++++++
src/kudu/scripts/cal_bill_daily.py | 280 +++++++++++++
src/kudu/scripts/falcon_screen.json | 603 ++++++++++++++++++++++++++++
src/kudu/scripts/falcon_screen.py | 603 ++++++++++++++++++++++++++++
src/kudu/scripts/kudu_falcon_screen.sh | 119 ++++++
src/kudu/scripts/kudu_utils.py | 106 +++++
src/kudu/scripts/kudurc | 69 ++++
src/kudu/scripts/minos_control_server.py | 225 +++++++++++
11 files changed, 2260 insertions(+)
diff --git a/kudu b/kudu
new file mode 120000
index 0000000..f2638cd
--- /dev/null
+++ b/kudu
@@ -0,0 +1 @@
+build/release/bin/kudu
\ No newline at end of file
diff --git a/src/kudu/scripts/batch_operate_on_tables.sh b/src/kudu/scripts/batch_operate_on_tables.sh
new file mode 100755
index 0000000..09a0e3e
--- /dev/null
+++ b/src/kudu/scripts/batch_operate_on_tables.sh
@@ -0,0 +1,68 @@
+#!/bin/bash
+
+if [ $# -lt 3 ]
+then
+ echo "This tool is for batch operation on batch of tables in a cluster"
+ echo "USAGE: $0 file operate cluster [dst-cluster]"
+ echo " file: A file contains several table names in a cluster, one table name per line."
+ echo " Or 'auto' means all tables in this cluster"
+ echo " operate: Now support 'copy', 'delete', 'describe' and 'scan'"
+ echo " cluster: CLuster name or master RPC addresses"
+ echo " dst-cluster: Master addresses of destination cluster, needed only when 'operate' is 'copy'"
+ exit -1
+fi
+
+FILE=$1
+OPERATE=$2
+CLUSTER=$3
+DST_CLUSTER=$4
+#FLAGS="-show_attributes"
+#FLAGS="-create_table=false -write_type=upsert"
+BIN_PATH=${KUDU_HOME}/kudu
+PID=$$
+
+echo "UID: ${UID}"
+echo "PID: ${PID}"
+echo "tables:"
+if [ "${FILE}" == "auto" ]
+then
+ echo "All tables in the cluster"
+else
+ cat ${FILE}
+fi
+echo "operate: ${OPERATE}"
+echo "cluster: ${CLUSTER}"
+echo "dst cluster: ${DST_CLUSTER}"
+echo "flags: ${FLAGS}"
+
+echo ""
+echo "All params above have been checked? (yes)"
+read INPUT
+if [ ! -n "${INPUT}" ] || [ "${INPUT}" != "yes" ]
+then
+ exit $?
+fi
+
+if [ -n "${DST_CLUSTER}" ]
+then
+ DST_CLUSTER=@${DST_CLUSTER}
+fi
+
+if [ "${FILE}" == "auto" ]
+then
+ TABLE_LIST=/tmp/$UID.${PID}.table.list
+ ${BIN_PATH} table list @${CLUSTER} | sort -n >${TABLE_LIST}
+else
+ TABLE_LIST=${FILE}
+fi
+
+if [ ! -f "${TABLE_LIST}" ]
+then
+ echo "file ${TABLE_LIST} is not exist!"
+ exit $?
+fi
+
+while read TABLE
+do
+ ${BIN_PATH} table ${OPERATE} @${CLUSTER} ${TABLE} ${DST_CLUSTER} ${FLAGS}
+done < ${TABLE_LIST}
diff --git a/src/kudu/scripts/build_env.sh b/src/kudu/scripts/build_env.sh
new file mode 100755
index 0000000..dde2d19
--- /dev/null
+++ b/src/kudu/scripts/build_env.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+
+OS=`lsb_release -d | awk '{print $2}'`
+echo "Setup build env for kudu on $OS"
+
+if [[ "$OS" == "CentOS" ]]; then
+ sudo yum -y install autoconf automake cyrus-sasl-devel cyrus-sasl-gssapi \
+ cyrus-sasl-plain flex gcc gcc-c++ gdb git java-1.8.0-openjdk-devel \
+ krb5-server krb5-workstation libtool make openssl-devel patch pkgconfig \
+ redhat-lsb-core rsync unzip vim-common which
+ DTLS_RPM=rhscl-devtoolset-3-epel-6-x86_64-1-2.noarch.rpm
+ DTLS_RPM_URL=https://www.softwarecollections.org/repos/rhscl/devtoolset-3/epel-6-x86_64/noarch/${DTLS_RPM}
+ wget ${DTLS_RPM_URL} -O ${DTLS_RPM}
+ sudo yum install -y scl-utils ${DTLS_RPM}
+ sudo yum install -y devtoolset-3-toolchain
+elif [[ "$OS" == "Ubuntu" ]]; then
+ sudo apt-get -y install autoconf automake curl flex g++ gcc gdb git \
+ krb5-admin-server krb5-kdc krb5-user libkrb5-dev libsasl2-dev libsasl2-modules \
+ libsasl2-modules-gssapi-mit libssl-dev libtool lsb-release make ntp \
+ openjdk-8-jdk openssl patch pkg-config python rsync unzip vim-common
+else
+ echo "Unsupported OS: $OS in $0"
+ exit
+fi
diff --git a/src/kudu/scripts/build_kudu.sh b/src/kudu/scripts/build_kudu.sh
new file mode 100755
index 0000000..1016697
--- /dev/null
+++ b/src/kudu/scripts/build_kudu.sh
@@ -0,0 +1,162 @@
+#!/bin/bash
+
+BASE_DIR="$( cd "$( dirname "$0" )" && cd ../../.. && pwd )"
+
+function usage()
+{
+ echo "Options:"
+ echo " -h"
+ echo " -g|--custom-gcc"
+ exit 0
+}
+
+#USAGE: copy_file src [src...] dest
+function copy_file() {
+ if [[ $# -lt 2 ]]; then
+ echo "ERROR: invalid copy file command: cp $*"
+ exit 1
+ fi
+ cp -v $*
+ if [[ $? -ne 0 ]]; then
+ echo "ERROR: copy file failed: cp $*"
+ exit 1
+ fi
+}
+
+function get_stdcpp_lib()
+{
+ libname=`ldd ${BASE_DIR}/build/latest/bin/kudu 2>/dev/null | grep libstdc++`
+ libname=`echo $libname | cut -f1 -d" "`
+ if [ $1 = "true" ]; then
+ gcc_path=`which gcc`
+ echo `dirname $gcc_path`/../lib64/$libname
+ else
+ libs=(`ldconfig -p|grep $libname|awk '{print $NF}'`)
+
+ for lib in ${libs[*]}; do
+ if [ "`check_bit $lib`" = "true" ]; then
+ echo "$lib"
+ return
+ fi
+ done;
+ fi
+}
+
+function check_bit()
+{
+ bit_mode=`getconf LONG_BIT`
+ lib=$1
+ check_bit=""
+ is_softlink=`file $lib | grep "symbolic link"`
+
+ if [ -z "$is_softlink" ]; then
+ check_bit=`file $lib |grep "$bit_mode-bit"`
+ else
+ real_lib_name=`ls -l $lib |awk '{print $NF}'`
+ lib_path=${lib%/*}
+ real_lib=${lib_path}"/"${real_lib_name}
+ check_bit=`file $real_lib |grep "$bit_mode-bit"`
+ fi
+ if [ -n "$check_bit" ]; then
+ echo "true"
+ fi
+}
+
+custom_gcc="false"
+while [[ $# > 0 ]]; do
+ option_key="$1"
+ case $option_key in
+ -g|--custom-gcc)
+ custom_gcc="true"
+ ;;
+ -h|--help)
+ usage
+ ;;
+ esac
+ shift
+done
+
+KUDU_VERSION=`cat ${BASE_DIR}/version.txt`
+OS=`lsb_release -d | awk '{print $2}'`
+echo "Start to build kudu $KUDU_VERSION on $OS"
+
+if [[ "$OS" == "CentOS" ]]; then
+ ${BASE_DIR}/build-support/enable_devtoolset.sh
+ ${BASE_DIR}/thirdparty/build-if-necessary.sh
+elif [[ "$OS" == "Ubuntu" ]]; then
+ ${BASE_DIR}/thirdparty/build-if-necessary.sh
+else
+ echo "ERROR: unsupported OS: $OS in $0"
+ exit 1
+fi
+
+rm -rf ${BASE_DIR}/build/release
+mkdir -p ${BASE_DIR}/build/release
+cd ${BASE_DIR}/build/release
+../../thirdparty/installed/common/bin/cmake -DCMAKE_BUILD_TYPE=release ../..
+make -j `cat /proc/cpuinfo | egrep "^processor\s:" | wc -l`
+if [[ $? -ne 0 ]]; then
+ echo "ERROR: build Kudu failed"
+ exit 1
+fi
+echo "Build Kudu succeed"
+
+VERSION_DEFINES=${BASE_DIR}/build/release/src/kudu/generated/version_defines.h
+if [[ ! -f ${VERSION_DEFINES} ]]; then
+ echo "ERROR: $VERSION_DEFINES not found"
+ exit 1
+fi
+
+CLEAN_REPO=`grep "^#define KUDU_BUILD_CLEAN_REPO " ${VERSION_DEFINES} | awk '{print $NF}' | tr 'A-Z' 'a-z'`
+if [[ "$CLEAN_REPO"x != "true"x ]]; then
+ echo "ERROR: repository is not clean"
+ exit 1
+fi
+
+VERSION=`grep "^#define KUDU_VERSION_STRING " ${VERSION_DEFINES} | cut -d "\"" -f 2`
+COMMIT_ID=`grep "^#define KUDU_GIT_HASH " ${VERSION_DEFINES} | cut -d "\"" -f 2`
+BUILD_TYPE=`grep "^#define KUDU_BUILD_TYPE " ${VERSION_DEFINES} | cut -d "\"" -f 2`
+PACK_VERSION=`echo ${VERSION}-${COMMIT_ID:0:7}-${OS}-${BUILD_TYPE} | tr 'A-Z' 'a-z'`
+PACK_NAME=kudu-${PACK_VERSION}
+
+echo "Starting package $PACK_NAME"
+PACK_DIR=${BASE_DIR}/build/${PACK_NAME}
+PACKAGE=${PACK_NAME}.tar.gz
+rm -rf ${PACK_DIR} ${BASE_DIR}/build/${PACKAGE}
+mkdir -p ${PACK_DIR}
+echo "Coping files to $PACK_DIR"
+copy_file ${BASE_DIR}/build/latest/bin/kudu-collector ${PACK_DIR}/kudu_collector
+copy_file ${BASE_DIR}/build/latest/bin/kudu-master ${PACK_DIR}/kudu_master
+copy_file ${BASE_DIR}/build/latest/bin/kudu-tserver ${PACK_DIR}/kudu_tablet_server
+copy_file ${BASE_DIR}/build/latest/bin/kudu ${PACK_DIR}/
+copy_file `get_stdcpp_lib $custom_gcc` ${PACK_DIR}/
+copy_file ${BASE_DIR}/src/kudu/scripts/batch_operate_on_tables.sh ${PACK_DIR}/
+copy_file ${BASE_DIR}/src/kudu/scripts/falcon_screen.json ${PACK_DIR}/
+copy_file ${BASE_DIR}/src/kudu/scripts/falcon_screen.py ${PACK_DIR}/
+copy_file ${BASE_DIR}/src/kudu/scripts/kudu_falcon_screen.sh ${PACK_DIR}/
+copy_file ${BASE_DIR}/src/kudu/scripts/minos_control_server.py ${PACK_DIR}/
+copy_file ${BASE_DIR}/src/kudu/scripts/cal_bill_daily.py ${PACK_DIR}/
+copy_file ${BASE_DIR}/src/kudu/scripts/kudu_utils.py ${PACK_DIR}/
+copy_file ${BASE_DIR}/src/kudu/scripts/start_local_kudu.sh ${PACK_DIR}/
+copy_file ${BASE_DIR}/src/kudu/scripts/kudurc ${PACK_DIR}/
+copy_file -r ${BASE_DIR}/www ${PACK_DIR}/
+cd ${BASE_DIR}/build
+tar -czf ${PACKAGE} ${PACK_NAME}
+echo "Packaged $PACKAGE succeed"
+
+PACK_TEMPLATE=""
+if [[ -n "$MINOS_CONFIG_FILE" ]]; then
+ PACK_TEMPLATE=`dirname $MINOS_CONFIG_FILE`/xiaomi-config/package/kudu.yaml
+fi
+
+if [[ -f ${PACK_TEMPLATE} ]]; then
+ echo "Modifying $PACK_TEMPLATE ..."
+ sed -i "/^version:/c version: \"$PACK_VERSION\"" ${PACK_TEMPLATE}
+ sed -i "/^build:/c build: \"\.\/run.sh pack\"" ${PACK_TEMPLATE}
+ sed -i "/^source:/c source: \"$BASE_DIR/build\"" ${PACK_TEMPLATE}
+else
+ echo "ERROR: modify kudu.yaml failed"
+ exit 1
+fi
+
+echo "Done"
diff --git a/src/kudu/scripts/cal_bill_daily.py b/src/kudu/scripts/cal_bill_daily.py
new file mode 100755
index 0000000..a92b72b
--- /dev/null
+++ b/src/kudu/scripts/cal_bill_daily.py
@@ -0,0 +1,280 @@
+#! /usr/bin/env python
+# coding=utf-8
+
+import commands
+import datetime
+from git import Repo
+import heapq
+import logging
+from logging.handlers import RotatingFileHandler
+import json
+import os
+import re
+import sys
+import time
+import kudu_utils
+import yaml
+
+
+g_ignore_db_set = ('system', 'lcsbinlog', 'default', 'zhangxu_test_kudu')
+g_month_path, g_month_data_path = kudu_utils.prepare_pricing_month_path()
+g_clusters_info_dict = yaml.load(open(kudu_utils.g_script_path + '/kudurc', 'r').read(), Loader=yaml.FullLoader)
+g_clusters_info = g_clusters_info_dict['clusters_info']
+g_commit_filenames = list()
+g_git_repo_dir = ''
+
+
+def printtsr(level, table, size, reason):
+ kudu_utils.LOG.log(level, 'table: ' + table + (', size: %fG' % (size/(1 << 30)) + ', reason: ') + reason)
+
+
+class TopKHeap(object):
+ def __init__(self, k):
+ self.k = k
+ self.data = []
+
+ def push(self, elem):
+ if len(self.data) < self.k:
+ heapq.heappush(self.data, elem)
+ else:
+ top_k_small = self.data[0]
+ if elem['size'] > top_k_small['size']:
+ heapq.heapreplace(self.data, elem)
+
+ def top_k(self):
+ return {x['table']: x['size'] for x in reversed([heapq.heappop(self.data) for _ in xrange(len(self.data))])}
+
+
+def add_org_size(dbtable, org, size, org_size_desc):
+ if len(org) == 0:
+ printtsr(logging.WARNING, dbtable, size, 'Org name is empty')
+ return False
+
+ if org not in org_size_desc.keys():
+ org_size_desc[org] = {}
+ org_size_desc[org]['size'] = 0
+ org_size_desc[org]['desc'] = TopKHeap(10)
+ org_size_desc[org]['size'] += size
+ org_size_desc[org]['desc'].push({'size': size, 'table': dbtable})
+ return True
+
+
+def get_org_size_desc_from_olap(cluster_name, dbtable_size_dict, known_db_org_dict):
+ db_org_dict = {}
+ meta_table = 'system.kudu_table_owners'
+ cmd = '%s/kudu table scan @%s %s -show_values=true' \
+ ' -columns=name,db,org 2>&1 | grep "(string name=\\\""'\
+ % (kudu_utils.script_path(), cluster_name, meta_table)
+ status, output = commands.getstatusoutput(cmd)
+ if status != 0:
+ kudu_utils.LOG.error('Scan table %s error, command %s, status %d, output \n%s' % (meta_table, cmd, status, output))
+ else:
+ for line in output.splitlines():
+ match_obj = re.search(r'string name="(.*)", string db="(.*)", string org="(.*)"', line, re.M | re.I)
+ if match_obj:
+ db = match_obj.group(2)
+ org = match_obj.group(3)
+ db_org_dict[db] = org
+ else:
+ kudu_utils.LOG.error('Table %s value format error, line\n%s' % (meta_table, line))
+
+ total_ignored_size = 0.0
+ org_size_desc = {}
+ for dbtable, size in dbtable_size_dict.iteritems():
+ db_table_list = dbtable.split('.')
+ if len(db_table_list) != 2:
+ total_ignored_size += size
+ printtsr(logging.WARNING, dbtable, size, 'Lack db')
+ continue
+
+ db, table = db_table_list[0], db_table_list[1]
+ if db in g_ignore_db_set:
+ total_ignored_size += size
+ printtsr(logging.INFO, dbtable, size, 'Ignored table')
+ continue
+
+ if db in known_db_org_dict.keys():
+ # 'org' from config file
+ org = known_db_org_dict[db]
+ elif db in db_org_dict.keys():
+ # 'org' from system table
+ org = db_org_dict[db]
+ else:
+ total_ignored_size += size
+ printtsr(logging.WARNING, db, size, 'Lack org ID')
+ continue
+
+ if not add_org_size(dbtable, org, size, org_size_desc):
+ total_ignored_size += size
+ continue
+
+ printtsr(logging.WARNING, 'TOTAL', total_ignored_size, 'Total ignored size')
+ return org_size_desc
+
+
+def get_cluster_stat_filename(date, cluster_name):
+ return g_month_data_path + date + '_' + cluster_name
+
+
+def get_service_usage_filename(date):
+ return g_month_data_path + date + '_kudu_total'
+
+
+def collect_origin_usage_for_cluster(cluster_name, cluster_info):
+ kudu_utils.LOG.info('Start to collect usage info for cluster %s' % cluster_name)
+ # Output: db.table size
+ cmd = '%s/kudu_collector -collector_master_addrs=%s ' \
+ '-collector_report_method=local -collector_metrics=on_disk_size -log_dir=./log | ' \
+ 'egrep "^table on_disk_size " | sort | awk \'{print $3, $4}\'' \
+ % (kudu_utils.g_script_path, cluster_info['master_addresses'])
+ status, output = commands.getstatusoutput(cmd)
+ if status != 0:
+ kudu_utils.LOG.fatal('Table stat error')
+ return
+
+ dbtable_size_dict = {}
+ for dbtable_size_str in output.splitlines():
+ dbtable_size_list = dbtable_size_str.split(' ')
+ assert(len(dbtable_size_list) == 2)
+ dbtable_size_dict[dbtable_size_list[0]] = float(dbtable_size_list[1])
+ known_db_org_dict = {}
+ if 'special_db_org' in cluster_info.keys():
+ known_db_org_dict = cluster_info['special_db_org']
+ org_size_desc = get_org_size_desc_from_olap(cluster_name, dbtable_size_dict, known_db_org_dict)
+
+ results = []
+ date = time.strftime('%Y-%m-%d', time.localtime())
+ period = int(time.mktime(datetime.datetime.strptime(date, "%Y-%m-%d").timetuple()))
+ for org, size_desc in org_size_desc.iteritems():
+ result = dict()
+ result['period'] = period
+ result['service_name'] = 'kudu'
+ result['region_name'] = cluster_info['region']
+ result['charge_type_name'] = cluster_info['charge_type']
+ result['instance_name'] = cluster_info['instance']
+ result['cluster'] = cluster_name
+ result['account_type'] = 'org' if org.find('CL') != -1 else 'kerberos'
+ result['account'] = org
+ result['usage'] = size_desc['size']
+ result['charge_object'] = size_desc['desc'].top_k()
+ results.append(result)
+ origin_usage_filename = get_cluster_stat_filename(date, cluster_name)
+ with open(origin_usage_filename, 'w') as origin_usage_file:
+ json.dump(results, origin_usage_file)
+ origin_usage_file.close()
+
+ g_commit_filenames.append(origin_usage_filename)
+
+
+def get_cluster_info(cluster_name):
+ if cluster_name not in g_clusters_info.keys():
+ kudu_utils.LOG.fatal('Cluster %s not found' % cluster_name)
+ return None
+
+ cluster_info = g_clusters_info[cluster_name]
+ if cluster_info['charge_type'] == 'public_share':
+ kudu_utils.LOG.warning('Ignore public_share cluster %s' % cluster_name)
+ return None
+
+ return cluster_info
+
+
+def collect_origin_usage_for_clusters(cluster_name_list):
+ for cluster_name in cluster_name_list:
+ cluster_info = get_cluster_info(cluster_name)
+ if not cluster_info:
+ continue
+ collect_origin_usage_for_cluster(cluster_name, cluster_info)
+
+
+def calc_usage_result(origin_usage_filename, service_usage_file):
+ kudu_utils.LOG.info('Start to process daily statistics file %s' % origin_usage_filename)
+ if not os.path.exists(origin_usage_filename):
+ kudu_utils.LOG.error('File not exist')
+ return
+ with open(origin_usage_filename, 'r') as origin_usage_file:
+ users_usage = json.load(origin_usage_file)
+ for user_usage in users_usage:
+ service_usage_file.write('%s, %s, %s, %s, %s, %s, %s, %s, \'{"storage_bytes":%d}\', \'%s\'\n'
+ % (user_usage['period'],
+ user_usage['service_name'],
+ user_usage['region_name'],
+ user_usage['charge_type_name'],
+ user_usage['instance_name'],
+ user_usage['cluster'],
+ user_usage['account_type'],
+ user_usage['account'],
+ user_usage['usage'],
+ json.dumps(user_usage['charge_object'])))
+ origin_usage_file.close()
+ kudu_utils.LOG.info('Write to file finished')
+
+
+def calc_usage_result_for_cluster(service_usage_file, cluster_name, date):
+ origin_usage_filename = get_cluster_stat_filename(date, cluster_name)
+ calc_usage_result(origin_usage_filename, service_usage_file)
+
+
+def calc_usage_result_for_clusters(cluster_name_list, date_list):
+ for date in date_list:
+ service_usage_filename = get_service_usage_filename(date)
+ with open(service_usage_filename, 'w') as service_usage_file:
+ # Write header
+ service_usage_file.write('period, service_name, region_name, charge_type_name, instance_name, '
+ 'cluster, account_type, account, usage, charge_object\n')
+ for cluster_name in cluster_name_list:
+ cluster_info = get_cluster_info(cluster_name)
+ if not cluster_info:
+ continue
+ calc_usage_result_for_cluster(service_usage_file, cluster_name, date)
+ service_usage_file.close()
+ kudu_utils.upload_usage_data('append', service_usage_filename)
+ g_commit_filenames.append(service_usage_filename)
+
+
+def push_file_to_repo(filenames):
+ repo = Repo(g_git_repo_dir)
+ assert not repo.bare
+
+ remote = repo.remote()
+ remote.pull()
+
+ index = repo.index
+ index.add(filenames)
+ index.commit('Kudu add statistics files')
+
+ remote.push()
+
+ kudu_utils.LOG.info('Pushed files %s to repo' % str(filenames))
+
+
+def main(argv=None):
+ if not os.path.exists(g_git_repo_dir + '/.git'):
+ kudu_utils.LOG.fatal('You must set `g_git_repo_dir` to a valid directory contains `.git`')
+ return
+
+ if argv is None:
+ argv = sys.argv
+
+ cluster_name_list = []
+ if len(argv) == 1:
+ # Calculate all clusters
+ cluster_name_list = list(g_clusters_info.iterkeys())
+ elif len(argv) == 2:
+ # Calculate specified cluster
+ cluster_name_list.append(argv[1])
+ else:
+ kudu_utils.LOG.fatal('Usage: $0 [cluster_name]')
+ return
+
+ collect_origin_usage_for_clusters(cluster_name_list)
+
+ # date_list = kudu_utils.get_date_list('2019-06-01', kudu_utils.get_date())
+ date_list = [kudu_utils.get_date()]
+ calc_usage_result_for_clusters(cluster_name_list, date_list)
+
+ push_file_to_repo(g_commit_filenames)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/src/kudu/scripts/falcon_screen.json b/src/kudu/scripts/falcon_screen.json
new file mode 100644
index 0000000..b15125c
--- /dev/null
+++ b/src/kudu/scripts/falcon_screen.json
@@ -0,0 +1,603 @@
+{
+ "comments": [
+ {
+ "screen": "screen名称",
+ "graphs": [
+ {
+ "title": "graph名称",
+ "endpoints": ["机器名或者tag标识,tag之间用空格分隔"],
+ "counters": ["counters名称,多个用逗号分隔。可以使用模糊匹配,支持metric(metric和tags(可选)空格隔开),精确匹配"],
+ "graph_type": "展示类型,endpoint视角为h,counters视角为k,组合视角为a",
+ "method": "绘图是否进行求和,求和填写sum,不求和填写空字符串",
+ "timespan": "展示的时间跨度,单位为秒"
+ }
+ ]
+ }
+ ],
+ "version": "20180827",
+ "counter_templates": {
+ "full": [
+ "metric=kudu-tserver-health service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=kudu-table-health service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=all_transactions_inflight service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=alter_schema_transactions_inflight service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=average_diskrowset_height service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=bloom_lookups_per_op_percentile_99 service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=bloom_lookups service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=bytes_flushed service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=commit_wait_duration_percentile_99 service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=compact_rs_duration_percentile_99 service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=compact_rs_running service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=delta_file_lookups_per_op_percentile_99 service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=delta_file_lookups service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=delta_major_compact_rs_duration_percentile_99 service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=delta_major_compact_rs_running service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=delta_minor_compact_rs_duration_percentile_99 service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=delta_minor_compact_rs_running service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=failed_elections_since_stable_leader service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=flush_dms_duration_percentile_99 service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=flush_dms_running service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=flush_mrs_duration_percentile_99 service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=flush_mrs_running service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=follower_memory_pressure_rejections service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=in_progress_ops service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=insertions_failed_dup_key service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=key_file_lookups_per_op_percentile_99 service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=key_file_lookups service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=leader_memory_pressure_rejections service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=log_append_latency_percentile_99 service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=log_bytes_logged service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=log_cache_num_ops service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=log_cache_size service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=log_entry_batches_per_group_percentile_99 service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=log_gc_duration_percentile_99 service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=log_gc_running service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=log_group_commit_latency_percentile_99 service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=log_reader_bytes_read service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=log_reader_entries_read service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=log_reader_read_batch_latency_percentile_99 service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=log_roll_latency_percentile_99 service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=log_sync_latency_percentile_99 service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=majority_done_ops service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=memrowset_size service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=mrs_lookups service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=num_rowsets_on_disk service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=on_disk_data_size service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=on_disk_size service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=op_prepare_queue_length_percentile_99 service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=op_prepare_queue_time_percentile_99 service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=op_prepare_run_time_percentile_99 service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=ops_behind_leader service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=raft_term service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=replica_count service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=rows_deleted service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=rows_inserted service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=rows_updated service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=rows_upserted service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=scanner_bytes_returned service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=scanner_bytes_scanned_from_disk service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=scanner_cells_returned service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=scanner_cells_scanned_from_disk service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=scanner_rows_returned service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=scanner_rows_scanned service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=scans_started service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=snapshot_read_inflight_wait_duration_percentile_99 service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=state service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=tablet_active_scanners service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=time_since_last_leader_heartbeat service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=transaction_memory_pressure_rejections service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=undo_delta_block_estimated_retained_bytes service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=undo_delta_block_gc_bytes_deleted service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=undo_delta_block_gc_delete_duration_percentile_99 service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=undo_delta_block_gc_init_duration_percentile_99 service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=undo_delta_block_gc_perform_duration_percentile_99 service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=undo_delta_block_gc_running service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=upserts_as_updates service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=write_op_duration_client_propagated_consistency_percentile_99 service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=write_op_duration_commit_wait_consistency_percentile_99 service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=write_transactions_inflight service=kudu cluster=${cluster.name} level=${level} v=4"
+ ],
+ "table_ab": [
+ "metric=all_transactions_inflight service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=alter_schema_transactions_inflight service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=average_diskrowset_height service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=bloom_lookups_per_op_percentile_99 service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=bloom_lookups service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=bytes_flushed service=kudu cluster=${cluster.name} level=${level} v=4"
+ ],
+ "table_c": [
+ "metric=commit_wait_duration_percentile_99 service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=compact_rs_duration_percentile_99 service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=compact_rs_running service=kudu cluster=${cluster.name} level=${level} v=4"
+ ],
+ "table_d": [
+ "metric=delta_file_lookups service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=delta_file_lookups_per_op_percentile_99 service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=delta_major_compact_rs_duration_percentile_99 service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=delta_major_compact_rs_running service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=delta_minor_compact_rs_duration_percentile_99 service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=delta_minor_compact_rs_running service=kudu cluster=${cluster.name} level=${level} v=4"
+ ],
+ "table_f": [
+ "metric=failed_elections_since_stable_leader service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=flush_dms_duration_percentile_99 service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=flush_dms_running service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=flush_mrs_duration_percentile_99 service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=flush_mrs_running service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=follower_memory_pressure_rejections service=kudu cluster=${cluster.name} level=${level} v=4"
+ ],
+ "table_ghijk": [
+ "metric=in_progress_ops service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=insertions_failed_dup_key service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=key_file_lookups service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=key_file_lookups_per_op_percentile_99 service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=kudu-table-health service=kudu cluster=${cluster.name} level=${level} v=4"
+ ],
+ "table_l": [
+ "metric=leader_memory_pressure_rejections service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=log_append_latency_percentile_99 service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=log_bytes_logged service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=log_cache_num_ops service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=log_cache_size service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=log_entry_batches_per_group_percentile_99 service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=log_gc_duration_percentile_99 service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=log_gc_running service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=log_group_commit_latency_percentile_99 service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=log_reader_bytes_read service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=log_reader_entries_read service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=log_reader_read_batch_latency_percentile_99 service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=log_roll_latency_percentile_99 service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=log_sync_latency_percentile_99 service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=lth service=kudu cluster=${cluster.name} level=${level} v=4"
+ ],
+ "table_mn": [
+ "metric=majority_done_ops service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=memrowset_size service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=mrs_lookups service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=num_rowsets_on_disk service=kudu cluster=${cluster.name} level=${level} v=4"
+ ],
+ "table_o": [
+ "metric=on_disk_data_size service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=on_disk_size service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=op_prepare_queue_length_percentile_99 service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=op_prepare_queue_time_percentile_99 service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=op_prepare_run_time_percentile_99 service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=ops_behind_leader service=kudu cluster=${cluster.name} level=${level} v=4"
+ ],
+ "table_r": [
+ "metric=raft_term service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=replica_count service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=rows_deleted service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=rows_inserted service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=rows_updated service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=rows_upserted service=kudu cluster=${cluster.name} level=${level} v=4"
+ ],
+ "table_s": [
+ "metric=scanner_bytes_returned service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=scanner_bytes_scanned_from_disk service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=scanner_cells_returned service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=scanner_cells_scanned_from_disk service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=scanner_rows_returned service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=scanner_rows_scanned service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=scans_started service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=snapshot_read_inflight_wait_duration_percentile_99 service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=state service=kudu cluster=${cluster.name} level=${level} v=4"
+ ],
+ "table_u": [
+ "metric=undo_delta_block_estimated_retained_bytes service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=undo_delta_block_gc_bytes_deleted service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=undo_delta_block_gc_delete_duration_percentile_99 service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=undo_delta_block_gc_init_duration_percentile_99 service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=undo_delta_block_gc_perform_duration_percentile_99 service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=undo_delta_block_gc_running service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=upserts_as_updates service=kudu cluster=${cluster.name} level=${level} v=4"
+ ],
+ "table_tw": [
+ "metric=tablet_active_scanners service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=time_since_last_leader_heartbeat service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=transaction_memory_pressure_rejections service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=write_op_duration_client_propagated_consistency_percentile_99 service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=write_op_duration_commit_wait_consistency_percentile_99 service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=write_transactions_inflight service=kudu cluster=${cluster.name} level=${level} v=4"
+ ],
+ "replica_count" : [
+ "metric=replica_count service=kudu cluster=${cluster.name} level=${level} v=4"
+ ],
+ "cluster" : [
+ "metric=kudu.success service=kudu level=${level}",
+ "metric=kudu.writeLatency service=kudu level=${level}",
+ "metric=kudu.scanLatency service=kudu level=${level}",
+ "metric=healthy_table_proportion service=kudu cluster=${cluster.name} level=${level} v=4"
+ ],
+ "cluster_stat" : [
+ "metric=masters_count service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=tservers_count service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=tables_count service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=tablets_count service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=replicas_count service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=on_disk_size service=kudu cluster=${cluster.name} level=${level} v=4",
+ "metric=on_disk_data_size service=kudu cluster=${cluster.name} level=${level} v=4"
+ ],
+ "sys" : [
+ "cpu.busy",
+ "load.15min",
+ "load.1min",
+ "load.5min",
+ "mem.memused",
+ "mem.memused.percent",
+ "net.if.in.bytes/iface=eth0",
+ "net.if.out.bytes/iface=eth0",
+ "net.if.total.dropped/iface=eth0",
+ "sys.ntp.offset/procname=chronyd"
+ ],
+ "disk_usage_percent" : [
+ "df.bytes.used.percent/fstype=ext4,mount=/home",
+ "df.bytes.used.percent/fstype=ext4,mount=/home/work/ssd1",
+ "df.bytes.used.percent/fstype=ext4,mount=/home/work/ssd2",
+ "df.bytes.used.percent/fstype=ext4,mount=/home/work/ssd3",
+ "df.bytes.used.percent/fstype=ext4,mount=/home/work/ssd4",
+ "df.bytes.used.percent/fstype=ext4,mount=/home/work/ssd5",
+ "df.bytes.used.percent/fstype=ext4,mount=/home/work/ssd6",
+ "df.bytes.used.percent/fstype=ext4,mount=/home/work/ssd7",
+ "df.bytes.used.percent/fstype=ext4,mount=/home/work/ssd8",
+ "df.bytes.used.percent/fstype=ext4,mount=/home/work/ssd9",
+ "df.bytes.used.percent/fstype=ext4,mount=/home/work/ssd10",
+ "df.bytes.used.percent/fstype=ext4,mount=/home/work/ssd11",
+ "df.bytes.used.percent/fstype=ext4,mount=/home/work/ssd12",
+ "df.bytes.used.percent/fstype=ext4,mount=/home/work/hdd1",
+ "df.bytes.used.percent/fstype=ext4,mount=/home/work/hdd2",
+ "df.bytes.used.percent/fstype=ext4,mount=/home/work/hdd3",
+ "df.bytes.used.percent/fstype=ext4,mount=/home/work/hdd4",
+ "df.bytes.used.percent/fstype=ext4,mount=/home/work/hdd5",
+ "df.bytes.used.percent/fstype=ext4,mount=/home/work/hdd6",
+ "df.bytes.used.percent/fstype=ext4,mount=/home/work/hdd7",
+ "df.bytes.used.percent/fstype=ext4,mount=/home/work/hdd8",
+ "df.bytes.used.percent/fstype=ext4,mount=/home/work/hdd9",
+ "df.bytes.used.percent/fstype=ext4,mount=/home/work/hdd10",
+ "df.bytes.used.percent/fstype=ext4,mount=/home/work/hdd11"
+ ],
+ "disk_usage_size" : [
+ "df.bytes.used/fstype=ext4,mount=/home",
+ "df.bytes.used/fstype=ext4,mount=/home/work/ssd1",
+ "df.bytes.used/fstype=ext4,mount=/home/work/ssd2",
+ "df.bytes.used/fstype=ext4,mount=/home/work/ssd3",
+ "df.bytes.used/fstype=ext4,mount=/home/work/ssd4",
+ "df.bytes.used/fstype=ext4,mount=/home/work/ssd5",
+ "df.bytes.used/fstype=ext4,mount=/home/work/ssd6",
+ "df.bytes.used/fstype=ext4,mount=/home/work/ssd7",
+ "df.bytes.used/fstype=ext4,mount=/home/work/ssd8",
+ "df.bytes.used/fstype=ext4,mount=/home/work/ssd9",
+ "df.bytes.used/fstype=ext4,mount=/home/work/ssd10",
+ "df.bytes.used/fstype=ext4,mount=/home/work/ssd11",
+ "df.bytes.used/fstype=ext4,mount=/home/work/ssd12",
+ "df.bytes.used/fstype=ext4,mount=/home/work/hdd1",
+ "df.bytes.used/fstype=ext4,mount=/home/work/hdd2",
+ "df.bytes.used/fstype=ext4,mount=/home/work/hdd3",
+ "df.bytes.used/fstype=ext4,mount=/home/work/hdd4",
+ "df.bytes.used/fstype=ext4,mount=/home/work/hdd5",
+ "df.bytes.used/fstype=ext4,mount=/home/work/hdd6",
+ "df.bytes.used/fstype=ext4,mount=/home/work/hdd7",
+ "df.bytes.used/fstype=ext4,mount=/home/work/hdd8",
+ "df.bytes.used/fstype=ext4,mount=/home/work/hdd9",
+ "df.bytes.used/fstype=ext4,mount=/home/work/hdd10",
+ "df.bytes.used/fstype=ext4,mount=/home/work/hdd11"
+ ],
+ "disk_io_util" : [
+ "disk.io.util/device=sdb",
+ "disk.io.util/device=sdc",
+ "disk.io.util/device=sdd",
+ "disk.io.util/device=sde",
+ "disk.io.util/device=sdf",
+ "disk.io.util/device=sdg",
+ "disk.io.util/device=sdh",
+ "disk.io.util/device=sdi",
+ "disk.io.util/device=sdj",
+ "disk.io.util/device=sdk",
+ "disk.io.util/device=sdl",
+ "disk.io.util/device=sdm",
+ "disk.io.util/device=nvme0n1",
+ "disk.io.util/device=nvme1n1",
+ "disk.io.util/device=nvme2n1",
+ "disk.io.util/device=nvme3n1",
+ "disk.io.util/device=nvme4n1",
+ "disk.io.util/device=nvme5n1",
+ "disk.io.util/device=xvda",
+ "disk.io.util/device=xvdb",
+ "disk.io.util/device=xvdc",
+ "disk.io.util/device=xvdd",
+ "disk.io.util/device=xvde",
+ "disk.io.util/device=xvdf",
+ "disk.io.util/device=vda",
+ "disk.io.util/device=vdb",
+ "disk.io.util/device=vdc",
+ "disk.io.util/device=vdd",
+ "disk.io.util/device=vde"
+ ]
+ },
+ "details": [
+ {
+ "screen": "${cluster.name} [cluster]",
+ "graphs": [
+ {
+ "title": "集群可用度",
+ "endpoints": ["${cluster.name}"],
+ "counters": {
+ "level": "cluster",
+ "template": "cluster"
+ },
+ "graph_type": "h",
+ "method": "",
+ "timespan": 86400
+ },
+ {
+ "title": "集群统计信息",
+ "endpoints": ["${cluster.name}"],
+ "counters": {
+ "level": "cluster",
+ "template": "cluster_stat"
+ },
+ "graph_type": "h",
+ "method": "",
+ "timespan": 86400
+ }
+ ]
+ },
+ {
+ "screen": "${cluster.name} [metrics_ab]",
+ "graphs": [
+ {
+ "title": "单表metrics",
+ "endpoints": ["${for.each.table}"],
+ "counters": {
+ "level": "table",
+ "template": "table_ab"
+ },
+ "graph_type": "h",
+ "method": "",
+ "timespan": 86400
+ }
+ ]
+ },
+ {
+ "screen": "${cluster.name} [metrics_c]",
+ "graphs": [
+ {
+ "title": "单表metrics",
+ "endpoints": ["${for.each.table}"],
+ "counters": {
+ "level": "table",
+ "template": "table_c"
+ },
+ "graph_type": "h",
+ "method": "",
+ "timespan": 86400
+ }
+ ]
+ },
+ {
+ "screen": "${cluster.name} [metrics_d]",
+ "graphs": [
+ {
+ "title": "单表metrics",
+ "endpoints": ["${for.each.table}"],
+ "counters": {
+ "level": "table",
+ "template": "table_d"
+ },
+ "graph_type": "h",
+ "method": "",
+ "timespan": 86400
+ }
+ ]
+ },
+ {
+ "screen": "${cluster.name} [metrics_f]",
+ "graphs": [
+ {
+ "title": "单表metrics",
+ "endpoints": ["${for.each.table}"],
+ "counters": {
+ "level": "table",
+ "template": "table_f"
+ },
+ "graph_type": "h",
+ "method": "",
+ "timespan": 86400
+ }
+ ]
+ },
+ {
+ "screen": "${cluster.name} [metrics_ghijk]",
+ "graphs": [
+ {
+ "title": "单表metrics",
+ "endpoints": ["${for.each.table}"],
+ "counters": {
+ "level": "table",
+ "template": "table_ghijk"
+ },
+ "graph_type": "h",
+ "method": "",
+ "timespan": 86400
+ }
+ ]
+ },
+ {
+ "screen": "${cluster.name} [metrics_l]",
+ "graphs": [
+ {
+ "title": "单表metrics",
+ "endpoints": ["${for.each.table}"],
+ "counters": {
+ "level": "table",
+ "template": "table_l"
+ },
+ "graph_type": "h",
+ "method": "",
+ "timespan": 86400
+ }
+ ]
+ },
+ {
+ "screen": "${cluster.name} [metrics_mn]",
+ "graphs": [
+ {
+ "title": "单表metrics",
+ "endpoints": ["${for.each.table}"],
+ "counters": {
+ "level": "table",
+ "template": "table_mn"
+ },
+ "graph_type": "h",
+ "method": "",
+ "timespan": 86400
+ }
+ ]
+ },
+ {
+ "screen": "${cluster.name} [metrics_o]",
+ "graphs": [
+ {
+ "title": "单表metrics",
+ "endpoints": ["${for.each.table}"],
+ "counters": {
+ "level": "table",
+ "template": "table_o"
+ },
+ "graph_type": "h",
+ "method": "",
+ "timespan": 86400
+ }
+ ]
+ },
+ {
+ "screen": "${cluster.name} [metrics_r]",
+ "graphs": [
+ {
+ "title": "单表metrics",
+ "endpoints": ["${for.each.table}"],
+ "counters": {
+ "level": "table",
+ "template": "table_r"
+ },
+ "graph_type": "h",
+ "method": "",
+ "timespan": 86400
+ }
+ ]
+ },
+ {
+ "screen": "${cluster.name} [metrics_s]",
+ "graphs": [
+ {
+ "title": "单表metrics",
+ "endpoints": ["${for.each.table}"],
+ "counters": {
+ "level": "table",
+ "template": "table_s"
+ },
+ "graph_type": "h",
+ "method": "",
+ "timespan": 86400
+ }
+ ]
+ },
+ {
+ "screen": "${cluster.name} [metrics_u]",
+ "graphs": [
+ {
+ "title": "单表metrics",
+ "endpoints": ["${for.each.table}"],
+ "counters": {
+ "level": "table",
+ "template": "table_u"
+ },
+ "graph_type": "h",
+ "method": "",
+ "timespan": 86400
+ }
+ ]
+ },
+ {
+ "screen": "${cluster.name} [metrics_tw]",
+ "graphs": [
+ {
+ "title": "单表metrics",
+ "endpoints": ["${for.each.table}"],
+ "counters": {
+ "level": "table",
+ "template": "table_tw"
+ },
+ "graph_type": "h",
+ "method": "",
+ "timespan": 86400
+ }
+ ]
+ },
+ {
+ "screen": "${cluster.name} [tserver]",
+ "graphs": [
+ {
+ "title": "单节点metrics",
+ "endpoints": ["${for.each.tserver}"],
+ "counters": {
+ "level": "host",
+ "template": "full"
+ },
+ "graph_type": "h",
+ "method": "",
+ "timespan": 86400
+ }
+ ]
+ },
+ {
+ "screen": "${cluster.name} [server-sys]",
+ "graphs": [
+ {
+ "title": "单节点sys指标",
+ "endpoints": ["${for.each.tserver}", "${for.each.master}"],
+ "counters": {
+ "level": "host",
+ "template": "sys"
+ },
+ "graph_type": "h",
+ "method": "",
+ "timespan": 86400
+ },
+ {
+ "title": "磁盘用量(百分比)",
+ "endpoints": ["${for.each.tserver}", "${for.each.master}"],
+ "counters": {
+ "level": "host",
+ "template": "disk_usage_percent"
+ },
+ "graph_type": "a",
+ "method": "",
+ "timespan": 86400
+ },
+ {
+ "title": "磁盘用量(占用空间)",
+ "endpoints": ["${for.each.tserver}", "${for.each.master}"],
+ "counters": {
+ "level": "host",
+ "template": "disk_usage_size"
+ },
+ "graph_type": "a",
+ "method": "",
+ "timespan": 86400
+ },
+ {
+ "title": "磁盘IO util",
+ "endpoints": ["${for.each.tserver}", "${for.each.master}"],
+ "counters": {
+ "level": "host",
+ "template": "disk_io_util"
+ },
+ "graph_type": "a",
+ "method": "",
+ "timespan": 86400
+ }
+ ]
+ }
+ ]
+}
diff --git a/src/kudu/scripts/falcon_screen.py b/src/kudu/scripts/falcon_screen.py
new file mode 100755
index 0000000..26f330a
--- /dev/null
+++ b/src/kudu/scripts/falcon_screen.py
@@ -0,0 +1,603 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import requests
+import json
+import re
+import sys
+
+#
+# RESTful API doc: http://wiki.n.miui.com/pages/viewpage.action?pageId=66037692
+# falcon ctrl api: http://dev.falcon.srv/doc/
+#
+
+# account info
+serviceAccount = ""
+serviceSeedMd5 = ""
+
+###############################################################################
+
+# global variables
+falconServiceUrl = "http://falcon.srv"
+# falconServiceUrl = "http://dev.falcon.srv"
+kuduScreenId = 25748
+KUDU_CLUSTER_ID = 37613
+KUDU_TABLES_ID = 37638
+KUDU_TSERVER_ID = 37639
+KUDU_SYS_ID = 37640
+screenIdList = {
+ KUDU_CLUSTER_ID: "[cluster]",
+ KUDU_TABLES_ID: [
+ "[metrics_ab]",
+ "[metrics_c]",
+ "[metrics_d]",
+ "[metrics_f]",
+ "[metrics_ghijk]",
+ "[metrics_l]",
+ "[metrics_mn]",
+ "[metrics_o]",
+ "[metrics_r]",
+ "[metrics_s]",
+ "[metrics_u]",
+ "[metrics_tw]"],
+ KUDU_TSERVER_ID: "[tserver]",
+ KUDU_SYS_ID: "[server-sys]"}
+# kuduScreenId = 351
+sessionId = ""
+metaPort = ""
+replicaPort = ""
+collectorPort = ""
+
+
+# return:
+def get_session_id():
+ url = falconServiceUrl + "/v1.0/auth/info"
+ headers = {
+ "Accept": "text/plain"
+ }
+
+ r = requests.get(url, headers=headers)
+ if r.status_code != 200:
+ print(
+ "ERROR: get_session_id failed, status_code = %s, result:\n%s" %
+ (r.status_code, r.text))
+ sys.exit(1)
+
+ c = r.headers['Set-Cookie']
+ m = re.search('falconSessionId=([^;]+);', c)
+ if m:
+ global sessionId
+ sessionId = m.group(1)
+ print("INFO: sessionId =", sessionId)
+ else:
+ print("ERROR: get_session_id failed, cookie not set")
+ sys.exit(1)
+
+
+# return:
+def auth_by_misso():
+ url = falconServiceUrl + "/v1.0/auth/callback/misso"
+ headers = {
+ "Cookie": "falconSessionId=" +
+ sessionId,
+ "Authorization": serviceAccount +
+ ";" +
+ serviceSeedMd5 +
+ ";" +
+ serviceSeedMd5}
+
+ r = requests.get(url, headers=headers)
+ if r.status_code != 200:
+ print(
+ "ERROR: auth_by_misso failed, status_code = %s, result:\n%s" %
+ (r.status_code, r.text))
+ sys.exit(1)
+
+
+# return:
+def check_auth_info():
+ url = falconServiceUrl + "/v1.0/auth/info"
+ headers = {
+ "Cookie": "falconSessionId=" + sessionId
+ }
+
+ r = requests.get(url, headers=headers)
+ if r.status_code != 200:
+ print(
+ "ERROR: check_auth_info failed, status_code = %s, result:\n%s" %
+ (r.status_code, r.text))
+ sys.exit(1)
+
+ j = json.loads(r.text)
+ if "user" not in j or j["user"] is None or "name" not in j["user"] or j["user"]["name"] != serviceAccount:
+ print("ERROR: check_auth_info failed, bad json result:\n%s" % r.text)
+ sys.exit(1)
+
+
+def login():
+ get_session_id()
+ auth_by_misso()
+ check_auth_info()
+ print("INFO: login succeed")
+
+
+# return:
+def logout():
+ url = falconServiceUrl + "/v1.0/auth/logout"
+ headers = {
+ "Cookie": "falconSessionId=" + sessionId
+ }
+
+ r = requests.get(url, headers=headers)
+ if r.status_code != 200:
+ print(
+ "ERROR: logout failed, status_code = %s, result:\n%s" %
+ (r.status_code, r.text))
+ sys.exit(1)
+
+ print("INFO: logout succeed")
+
+
+# return: screenId
+def create_screen(screenName, scrid):
+ url = falconServiceUrl + "/v1.0/dashboard/screen"
+ headers = {
+ "Cookie": "falconSessionId=" + sessionId
+ }
+ req = {
+ "pid": scrid,
+ "name": screenName
+ }
+
+ r = requests.post(url, headers=headers, data=json.dumps(req))
+ if r.status_code != 200:
+ print(
+ "ERROR: create_screen failed, screenName = %s, status_code = %s, result:\n%s" %
+ (screenName, r.status_code, r.text))
+ sys.exit(1)
+
+ j = json.loads(r.text)
+ if "id" not in j:
+ print(
+ "ERROR: create_screen failed, screenName = %s, bad json result\n%s" %
+ (screenName, r.text))
+ sys.exit(1)
+
+ screenId = j["id"]
+ print(
+ "INFO: create_screen succeed, screenName = %s, screenId = %s" %
+ (screenName, screenId))
+ return screenId
+
+
+def parse_lines(file_name):
+ lines = []
+ for line in open(file_name):
+ line.strip()
+ if len(line) > 0:
+ if line in lines:
+ print("ERROR: bad file: duplicate line '%s'" % line)
+ sys.exit(1)
+ lines.append(line)
+ return lines
+
+
+# return: screenConfigs
+def prepare_screen_config(
+ clusterName,
+ templateName,
+ screenTemplateFile,
+ tableListFile,
+ masterListFile,
+ tserverListFile):
+ # tableList
+ tableList = parse_lines(tableListFile)
+ if len(tableList) == 0:
+ print("WARN: empty table list file, will not create table level falcon screen")
+
+ # masterList
+ masterList = parse_lines(masterListFile)
+ if len(masterList) == 0:
+ print("ERROR: bad master list file: should be non-empty list")
+ sys.exit(1)
+
+ # tserverList
+ tserverList = parse_lines(tserverListFile)
+ if len(tserverList) == 0:
+ print("ERROR: bad tserver list file: should be non-empty list")
+ sys.exit(1)
+
+ # template json
+ jsonData = json.loads(open(screenTemplateFile).read())
+ templateJson = jsonData['counter_templates']
+ screensJson = jsonData['details']
+ if not isinstance(screensJson, list) or len(screensJson) == 0:
+ print(
+ "ERROR: bad screen template json: [details] should be provided as non-empty list")
+ sys.exit(1)
+
+ screenConfigs = {}
+ for screenJson in screensJson:
+ # screen name
+ screen = screenJson["screen"]
+ if not isinstance(screen, (str, unicode)) or len(screen) == 0:
+ print(
+ "ERROR: bad json: [details][screen]: should be provided as non-empty str")
+ sys.exit(1)
+ screen = screen.replace("${cluster.name}", clusterName)
+ if screen in screenConfigs:
+ print("ERROR: duplicate screen '%s'" % screen)
+ sys.exit(1)
+
+ # graphs in screen
+ graphConfigs = []
+ position = 1
+ for graphJson in screenJson['graphs']:
+ # title
+ title = graphJson["title"]
+ if not isinstance(title, (str, unicode)) or len(title) == 0:
+ print(
+ "ERROR: bad json: [details][%s][graphs][%s]: [title] should be provided as non-empty str" %
+ (screen, title))
+ sys.exit(1)
+ if title in graphConfigs:
+ print("ERROR: duplicate title '%s'" % title)
+ sys.exit(1)
+
+ # endpoints
+ endpoints = graphJson["endpoints"]
+ newEndpoints = []
+ for endpoint in endpoints:
+ if len(endpoint) != 0:
+ if endpoint.find("${cluster.name}") != -1:
+ newEndpoints.append(
+ endpoint.replace(
+ "${cluster.name}",
+ clusterName))
+ elif endpoint.find("${for.each.master}") != -1:
+ newEndpoints += masterList
+ elif endpoint.find("${for.each.tserver}") != -1:
+ newEndpoints += tserverList
+ elif endpoint.find("${for.each.table}") != -1:
+ newEndpoints += tableList
+ else:
+ newEndpoints.append(endpoint)
+ newEndpoints = list(set(newEndpoints))
+ if len(newEndpoints) == 0:
+ print(
+ "WARN: bad json: [details][%s][graphs][%s]: [endpoints] should be provided as non-empty list" %
+ (screen, title))
+
+ # counters
+ newCounters = []
+ counters = graphJson["counters"]
+ if not isinstance(counters, dict) or len(counters) == 0:
+ print(
+ "ERROR: bad json: [details][%s][graphs][%s]: [counters] should be provided as non-empty list/dict" %
+ (screen, title))
+ sys.exit(1)
+ for counter in templateJson[counters["template"] if counters.has_key("template") else templateName]:
+ newCounters.append(
+ counter.replace(
+ "${cluster.name}",
+ clusterName). replace(
+ "${level}",
+ counters["level"]))
+ if len(newCounters) == 0:
+ print(
+ "ERROR: bad json: [details][%s][graphs][%s]: [counters] should be provided as non-empty list" %
+ (screen, title))
+ sys.exit(1)
+
+ # graphType
+ graphType = graphJson["graph_type"]
+ if not isinstance(graphType, (str, unicode)):
+ print(
+ "ERROR: bad json: [details][%s][graphs][%s]: [graph_type] should be provided as non-empty list" %
+ (screen, title))
+ sys.exit(1)
+ if graphType != "h" and graphType != "k" and graphType != "a":
+ print(
+ "ERROR: bad json: [details][%s][graphs][%s]: [graph_type] should be 'h' or 'k' or 'a'" %
+ (screen, title))
+ sys.exit(1)
+
+ # method
+ method = graphJson["method"]
+ if not isinstance(method, (str, unicode)):
+ print(
+ "ERROR: bad json: [details][%s][graphs][%s]: [method] should be provided as str" %
+ (screen, title))
+ sys.exit(1)
+ if method != "" and method != "sum":
+ print(
+ "ERROR: bad json: [details][%s][graphs][%s]: [method] should be '' or 'sum'" %
+ (screen, title))
+ sys.exit(1)
+
+ # timespan
+ timespan = graphJson["timespan"]
+ if not isinstance(timespan, int) or timespan <= 0:
+ print(
+ "ERROR: bad json: [details][%s][graphs][%s]: [timespan] should be provided as positive int" %
+ (screen, title))
+ sys.exit(1)
+
+ graphConfig = {}
+ graphConfig["counters"] = newCounters
+ graphConfig["endpoints"] = newEndpoints
+ graphConfig["falcon_tags"] = ""
+ graphConfig["graph_type"] = graphType
+ graphConfig["method"] = method
+ graphConfig["position"] = position
+ graphConfig["timespan"] = timespan
+ graphConfig["title"] = title
+ graphConfigs.append(graphConfig)
+
+ position += 1
+ screenConfigs[screen] = graphConfigs
+
+ return screenConfigs
+
+
+# return: graphId
+def create_graph(graphConfig):
+ url = falconServiceUrl + "/v1.0/dashboard/graph"
+ headers = {
+ "Cookie": "falconSessionId=" + sessionId
+ }
+
+ r = requests.post(url, headers=headers, data=json.dumps(graphConfig))
+ if r.status_code != 200:
+ print(
+ "ERROR: create_graph failed, graphTitle = \"%s\", status_code = %s, result:\n%s" %
+ (graphConfig["title"], r.status_code, r.text))
+ sys.exit(1)
+
+ j = json.loads(r.text)
+ if "id" not in j:
+ print(
+ "ERROR: create_graph failed, graphTitle = \"%s\", bad json result\n%s" %
+ (graphConfig["title"], r.text))
+ sys.exit(1)
+
+ graphId = j["id"]
+ print("INFO: create_graph succeed, graphTitle = \"%s\", graphId = %s"
+ % (graphConfig["title"], graphId))
+
+ # udpate graph position immediately
+ graphConfig["id"] = graphId
+ update_graph(graphConfig, "position")
+
+ return graphId
+
+
+# return: screen[]
+def get_kudu_screens(scrid):
+ url = falconServiceUrl + "/v1.0/dashboard/screen/pid/" + str(scrid)
+ headers = {
+ "Cookie": "falconSessionId=" + sessionId
+ }
+
+ r = requests.get(url, headers=headers)
+ if r.status_code != 200:
+ print(
+ "ERROR: get_kudu_screens failed, status_code = %s, result:\n%s" %
+ (r.status_code, r.text))
+ sys.exit(1)
+
+ j = json.loads(r.text)
+
+ print("INFO: get_kudu_screens succeed, screenCount = %s" % len(j))
+ return j
+
+
+# return: graph[]
+def get_screen_graphs(screenName, screenId):
+ url = falconServiceUrl + "/v1.0/dashboard/graph/screen/" + str(screenId)
+ headers = {
+ "Cookie": "falconSessionId=" + sessionId
+ }
+
+ r = requests.get(url, headers=headers)
+ if r.status_code != 200:
+ print(
+ "ERROR: get_screen_graphs failed, screenName = %s, screenId = %s, status_code = %s, result:\n%s" %
+ (screenName, screenId, r.status_code, r.text))
+ sys.exit(1)
+
+ j = json.loads(r.text)
+
+ print(
+ "INFO: get_screen_graphs succeed, screenName = %s, screenId = %s, graphCount = %s" %
+ (screenName, screenId, len(j)))
+ return j
+
+
+# return:
+def delete_graph(graphTitle, graphId):
+ url = falconServiceUrl + "/v1.0/dashboard/graph/" + str(graphId)
+ headers = {
+ "Cookie": "falconSessionId=" + sessionId
+ }
+
+ r = requests.delete(url, headers=headers)
+ if r.status_code != 200 or r.text.find("delete success!") == -1:
+ print(
+ "ERROR: delete_graph failed, graphTitle = \"%s\", graphId = %s, status_code = %s, result:\n%s" %
+ (graphTitle, graphId, r.status_code, r.text))
+ sys.exit(1)
+
+ print(
+ "INFO: delete_graph succeed, graphTitle = \"%s\", graphId = %s" %
+ (graphTitle, graphId))
+
+
+# return:
+def update_graph(graphConfig, updateReason):
+ url = falconServiceUrl + "/v1.0/dashboard/graph/" + str(graphConfig["id"])
+ headers = {
+ "Cookie": "falconSessionId=" + sessionId
+ }
+
+ r = requests.put(url, headers=headers, data=json.dumps(graphConfig))
+ if r.status_code != 200:
+ print(
+ "ERROR: update_graph failed, graphTitle = \"%s\", graphId = %s, status_code = %s, result:\n%s" %
+ (graphConfig["title"], graphConfig["id"], r.status_code, r.text))
+ sys.exit(1)
+
+ j = json.loads(r.text)
+ if "id" not in j:
+ print(
+ "ERROR: update_graph failed, graphTitle = \"%s\", graphId = %s, bad json result\n%s" %
+ (graphConfig["title"], graphConfig["id"], r.text))
+ sys.exit(1)
+
+ print(
+ "INFO: update_graph succeed, graphTitle = \"%s\", graphId = %s, updateReason = \"%s changed\"" %
+ (graphConfig["title"], graphConfig["id"], updateReason))
+
+
+# return: bool, reason
+def is_equal(graph1, graph2):
+ if graph1["title"] != graph2["title"]:
+ return False, "title"
+ if graph1["graph_type"] != graph2["graph_type"]:
+ return False, "graph_type"
+ if graph1["method"] != graph2["method"]:
+ return False, "method"
+ if graph1["position"] != graph2["position"]:
+ return False, "position"
+ if graph1["timespan"] != graph2["timespan"]:
+ return False, "timespan"
+ endpoints1 = graph1["endpoints"]
+ endpoints2 = graph2["endpoints"]
+ if len(endpoints1) != len(endpoints2):
+ return False, "endpoints"
+ for endpoint in endpoints1:
+ if endpoint not in endpoints2:
+ return False, "endpoints"
+ counters1 = graph1["counters"]
+ counters2 = graph2["counters"]
+ if len(counters1) != len(counters2):
+ return False, "counters"
+ for counter in counters1:
+ if counter not in counters2:
+ return False, "counters"
+ return True, ""
+
+
+def create_screen_and_graphs(screenName, scrid, graphConfigs):
+
+ # create screen
+ screenId = create_screen(screenName, scrid)
+ for graphConfig in graphConfigs:
+ graphConfig["screen_id"] = screenId
+ create_graph(graphConfig)
+ print("INFO: %s graphs created for %s" % (len(graphConfigs), screenName))
+
+
+def update_screen_and_graphs(screenName, screenId, graphConfigs):
+ oldGraphConfigs = get_screen_graphs(screenName, screenId)
+ if oldGraphConfigs is None:
+ print(
+ "ERROR: screen '%s' not exit, please create it first" %
+ clusterName)
+ sys.exit(1)
+
+ # list -> dict
+ oldGraphConfigsDict = {}
+ newGraphConfigsDict = {}
+ for graph in oldGraphConfigs:
+ oldGraphConfigsDict[graph["title"]] = graph
+ for graph in graphConfigs:
+ newGraphConfigsDict[graph["title"]] = graph
+
+ deleteConfigList = []
+ createConfigList = []
+ updateConfigList = []
+ for graph in oldGraphConfigs:
+ if not graph["title"] in newGraphConfigsDict:
+ deleteConfigList.append((graph["title"], graph["graph_id"]))
+ for graph in graphConfigs:
+ if not graph["title"] in oldGraphConfigsDict:
+ graph["screen_id"] = screenId
+ createConfigList.append(graph)
+ else:
+ oldGraph = oldGraphConfigsDict[graph["title"]]
+ equal, reason = is_equal(graph, oldGraph)
+ if not equal:
+ graph["id"] = oldGraph["graph_id"]
+ graph["screen_id"] = screenId
+ updateConfigList.append((graph, reason))
+
+ for graphTitle, graphId in deleteConfigList:
+ delete_graph(graphTitle, graphId)
+ for graph in createConfigList:
+ create_graph(graph)
+ for graph, reason in updateConfigList:
+ update_graph(graph, reason)
+
+ print("INFO: %d graphs deleted, %d graphs created, %d graphs updated" %
+ (len(deleteConfigList), len(createConfigList), len(updateConfigList)))
+
+
+if __name__ == '__main__':
+ if serviceAccount == "" or serviceSeedMd5 == "":
+ print(
+ "ERROR: please set 'serviceAccount' and 'serviceSeedMd5' in %s" %
+ sys.argv[0])
+ sys.exit(1)
+
+ if len(sys.argv) != 7:
+ print(
+ "USAGE: python %s <cluster_name> <template_name> <screen_template_file> <master_list_file> <tserver_list_file> <table_list_file>" %
+ sys.argv[0])
+ sys.exit(1)
+
+ clusterName = sys.argv[1]
+ templateName = sys.argv[2]
+ screenTemplateFile = sys.argv[3]
+ masterListFile = sys.argv[4]
+ tserverListFile = sys.argv[5]
+ tableListFile = sys.argv[6]
+
+ login()
+
+ for scrid, scrNames in screenIdList.items():
+ oldKuduScreens = get_kudu_screens(scrid)
+ oldScreenName2Id = {}
+ screenConfigs = prepare_screen_config(
+ clusterName,
+ templateName,
+ screenTemplateFile,
+ tableListFile,
+ masterListFile,
+ tserverListFile)
+ for oldScreen in oldKuduScreens:
+ oldScreenName2Id[oldScreen['name']] = oldScreen['id']
+ if scrid == KUDU_TABLES_ID:
+ for scrName in scrNames:
+ screenName = clusterName + " " + scrName
+ graphConfigs = screenConfigs[screenName]
+ if screenName not in oldScreenName2Id:
+ # create screen
+ create_screen_and_graphs(screenName, scrid, graphConfigs)
+ else:
+ # update screen
+ screenId = oldScreenName2Id[screenName]
+ update_screen_and_graphs(
+ screenName, screenId, graphConfigs)
+ else:
+ screenName = clusterName + " " + scrNames
+ graphConfigs = screenConfigs[screenName]
+ if screenName not in oldScreenName2Id:
+ # create screen
+ create_screen_and_graphs(screenName, scrid, graphConfigs)
+ else:
+ # update screen
+ screenId = oldScreenName2Id[screenName]
+ update_screen_and_graphs(screenName, screenId, graphConfigs)
+
+ logout()
diff --git a/src/kudu/scripts/kudu_falcon_screen.sh b/src/kudu/scripts/kudu_falcon_screen.sh
new file mode 100755
index 0000000..046352b
--- /dev/null
+++ b/src/kudu/scripts/kudu_falcon_screen.sh
@@ -0,0 +1,119 @@
+#!/bin/bash
+
+PID=$$
+BASE_DIR="$( cd "$( dirname "$0" )" && pwd )"
+KUDU=${KUDU_HOME}/kudu
+COLLECTOR=${KUDU_HOME}/kudu_collector
+if [[ ! -f ${KUDU} || ! -f ${COLLECTOR} ]]; then
+ echo "ERROR: ${KUDU} or ${COLLECTOR} not found"
+ exit 1
+fi
+KUDURC=${KUDU_CONFIG}/kudurc
+if [[ ! -f ${KUDURC} ]]; then
+ echo "ERROR: ${KUDURC} not found"
+ exit 1
+fi
+
+function usage() {
+cat << EOF
+This tool is for update falcon screen for specified kudu cluster.
+USAGE: $0 <cluster_name> [table_count] [metrics_template]
+ cluster_name Cluster name operated on, should be configurated in $KUDU_CONFIG/kudurc
+ table_count An indicator of how many tables will be monitored, actual monitored table count is in range [table_count, 3*table_count]
+ metrics_template Which metric template will be used, 'simple' or 'full'
+EOF
+}
+
+if [[ $# -lt 1 ]]
+then
+ usage
+ exit 1
+fi
+
+CLUSTER=$1
+TABLE_COUNT=9999
+if [[ $# -ge 2 ]]
+then
+ TABLE_COUNT=$2
+fi
+
+TEMPLATE_NAME='full'
+if [[ $# -ge 3 ]]
+then
+ TEMPLATE_NAME=$3
+fi
+if [[ "${TEMPLATE_NAME}"x != "simple"x && "${TEMPLATE_NAME}"x != "full"x ]]
+then
+ usage
+ exit 1
+fi
+
+echo "UID: ${UID}"
+echo "PID: ${PID}"
+echo "cluster: ${CLUSTER}"
+echo "top n table: ${TABLE_COUNT}"
+echo "metric template: ${TEMPLATE_NAME}"
+echo "Start time: `date`"
+ALL_START_TIME=$((`date +%s`))
+echo
+
+# get master list
+${KUDU} master list @${CLUSTER} -format=space | awk -F' |:' '{print $2}' | sort -n &>/tmp/${UID}.${PID}.kudu.master.list
+if [[ $? -ne 0 ]]; then
+ echo "`kudu master list @${CLUSTER} -format=space` failed"
+ exit $?
+fi
+
+MASTER_COUNT=`cat /tmp/${UID}.${PID}.kudu.master.list | wc -l`
+if [[ ${MASTER_COUNT} -eq 0 ]]; then
+ echo "ERROR: master list is empty, please check the cluster ${CLUSTER}"
+ exit -1
+fi
+
+# get tserver list
+${KUDU} tserver list @${CLUSTER} -format=space | awk -F' |:' '{print $2}' | sort -n &>/tmp/${UID}.${PID}.kudu.tserver.list
+if [[ $? -ne 0 ]]; then
+ echo "`kudu tserver list @${CLUSTER} -format=space` failed"
+ exit $?
+fi
+
+TSERVER_COUNT=`cat /tmp/${UID}.${PID}.kudu.tserver.list | wc -l`
+if [[ ${TSERVER_COUNT} -eq 0 ]]; then
+ echo "ERROR: tserver list is empty, please check the cluster ${CLUSTER}"
+ exit 1
+fi
+
+function parse_yaml() {
+ python -c "import yaml;print(yaml.load(open('$1').read(), Loader=yaml.FullLoader)['clusters_info']['$2']['master_addresses'])"
+}
+MASTERS=$(parse_yaml ${KUDURC} ${CLUSTER})
+
+# get table list
+${COLLECTOR} -collector_master_addrs=${MASTERS} -collector_cluster_name=${CLUSTER} -collector_report_method=local -collector_metrics=bytes_flushed,on_disk_size,scanner_bytes_returned -log_dir=./log > /tmp/${UID}.${PID}.kudu.metric_table_value
+if [[ $? -ne 0 ]]; then
+ echo "ERROR: ${COLLECTOR} execute failed"
+ exit 1
+fi
+
+cat /tmp/${UID}.${PID}.kudu.metric_table_value | egrep "^table bytes_flushed " | sort -rnk4 | head -n ${TABLE_COUNT} | awk '{print $3}' > /tmp/${UID}.${PID}.kudu.top.bytes_flushed
+cat /tmp/${UID}.${PID}.kudu.metric_table_value | egrep "^table on_disk_size " | sort -rnk4 | head -n ${TABLE_COUNT} | awk '{print $3}' > /tmp/${UID}.${PID}.kudu.top.on_disk_size
+cat /tmp/${UID}.${PID}.kudu.metric_table_value | egrep "^table scanner_bytes_returned " | sort -rnk4 | head -n ${TABLE_COUNT} | awk '{print $3}' > /tmp/${UID}.${PID}.kudu.top.scanner_bytes_returned
+cat /tmp/${UID}.${PID}.kudu.top.* | sort -n | uniq > /tmp/${UID}.${PID}.kudu.table.list
+echo "total `wc -l /tmp/${UID}.${PID}.kudu.table.list | awk '{print $1}'` tables to monitor"
+echo -e "\033[32m Please set the following one line to the kudu collector's \`collector_attributes\` argument manually\033[0m"
+echo -n "table_name:"
+awk BEGIN{RS=EOF}'{gsub(/\n/,",");print}' /tmp/${UID}.${PID}.kudu.table.list
+echo ""
+
+python ${BASE_DIR}/falcon_screen.py ${CLUSTER} ${TEMPLATE_NAME} ${BASE_DIR}/falcon_screen.json /tmp/${UID}.${PID}.kudu.master.list /tmp/${UID}.${PID}.kudu.tserver.list /tmp/${UID}.${PID}.kudu.table.list
+if [[ $? -ne 0 ]]; then
+ echo "ERROR: falcon screen operate failed"
+ exit 1
+fi
+
+echo
+echo "Finish time: `date`"
+ALL_FINISH_TIME=$((`date +%s`))
+echo "Falcon screen operate done, elapsed time is $((ALL_FINISH_TIME - ALL_START_TIME)) seconds."
+
+rm -f /tmp/${UID}.${PID}.kudu.* &>/dev/null
diff --git a/src/kudu/scripts/kudu_utils.py b/src/kudu/scripts/kudu_utils.py
new file mode 100755
index 0000000..8dc5dfa
--- /dev/null
+++ b/src/kudu/scripts/kudu_utils.py
@@ -0,0 +1,106 @@
+#! /usr/bin/env python
+# coding=utf-8
+
+import datetime
+import dateutil.relativedelta
+import logging
+from logging.handlers import RotatingFileHandler
+import os
+import requests
+
+LOG = logging.getLogger()
+g_time = datetime.datetime.now()
+
+
+def init_log():
+ if not os.path.exists('log/'):
+ os.makedirs('log')
+ handler = RotatingFileHandler('log/kudu.log',
+ mode='a',
+ maxBytes=100*1024*1024,
+ backupCount=10)
+ handler.setFormatter(
+ logging.Formatter(
+ fmt='%(asctime)s [%(thread)d] [%(levelname)s] %(filename)s:%(lineno)d %(message)s',
+ datefmt='%Y-%m-%d %H:%M:%S'))
+ LOG.addHandler(handler)
+ LOG.setLevel(logging.INFO)
+
+
+def make_dir(path):
+ try:
+ os.mkdir(path)
+ except OSError, e:
+ if e.errno != os.errno.EEXIST:
+ raise
+ pass
+
+
+def script_path():
+ return os.path.split(os.path.realpath(__file__))[0]
+
+
+def get_year(last_month):
+ time = g_time
+ if last_month:
+ time += dateutil.relativedelta.relativedelta(months=-1)
+ return time.strftime('%Y')
+
+
+def get_month(last_month):
+ time = g_time
+ if last_month:
+ time += dateutil.relativedelta.relativedelta(months=-1)
+ return time.strftime('%m')
+
+
+def prepare_pricing_month_path(last_month=False):
+ month_base_path = script_path() + '/year=' + get_year(last_month)
+ make_dir(month_base_path)
+ month_base_path += '/month=' + get_month(last_month)
+ make_dir(month_base_path)
+ data_path = month_base_path + '/data'
+ make_dir(data_path)
+ return month_base_path + '/', data_path + '/'
+
+
+def get_year_month(last_month):
+ return get_year(last_month) + '-' + get_month(last_month)
+
+
+def get_date():
+ time = g_time
+ return time.strftime('%Y-%m-%d')
+
+
+def get_date_list(start, end, step=1, format="%Y-%m-%d"):
+ strptime, strftime = datetime.datetime.strptime, datetime.datetime.strftime
+ days = (strptime(end, format) - strptime(start, format)).days
+ return [strftime(strptime(start, format) + datetime.timedelta(i), format) for i in xrange(0, days, step)]
+
+
+# method:
+# append: 追加写入账单
+# reload: 清空数据并重新写入账单
+def upload_usage_data(method, filename):
+ LOG.info('Start to report %s by %s mode' % (filename, method))
+ report_url = "http://production-cost.api.xiaomi.net/api/v1/data/upload"
+ token = "c2534683e5504ab4850c49873a36de61"
+ url = "%s?sk=%s&method=%s" % (report_url, token, method)
+ with open(filename, "rb") as f:
+ resp = requests.post(url, files={"file": f})
+ if resp.status_code == 200:
+ # 成功
+ details = resp.json()
+ if details['code'] == 0:
+ LOG.info('Succeed to report %s by %s mode' % (filename, method))
+ else:
+ LOG.fatal('Failed to report %s by %s mode, details: %s' % (filename, method, str(details).decode("unicode-escape")))
+ else:
+ # 失败
+ LOG.fatal('Report failed, code %d' % resp.status_code)
+
+
+g_script_path = script_path()
+os.environ['KUDU_CONFIG'] = g_script_path
+init_log()
diff --git a/src/kudu/scripts/kudurc b/src/kudu/scripts/kudurc
new file mode 100644
index 0000000..6b28831
--- /dev/null
+++ b/src/kudu/scripts/kudurc
@@ -0,0 +1,69 @@
+clusters_info:
+ c3prc-hadoop:
+ olap_version: 2
+ region: chnbj-idc
+ charge_type: share
+ instance: SSD
+ special_db_org:
+ mifi: CL3894
+ b2c: CL5281
+ master_addresses: c3-hadoop-kudu-prc-ct01.bj:18600,c3-hadoop-kudu-prc-ct02.bj:18600,c3-hadoop-kudu-prc-ct03.bj:18600
+ zjyprc-hadoop:
+ olap_version: 2
+ region: chnbj-idc
+ charge_type: share
+ instance: SSD
+ master_addresses: zjy-hadoop-prc-ct01.bj:14000,zjy-hadoop-prc-ct02.bj:14000,zjy-hadoop-prc-ct03.bj:14000
+ zjyprc-analysis:
+ olap_version: 2
+ region: chnbj-idc
+ charge_type: exclusive
+ instance: SSD
+ special_db_org:
+ kudu_demo: CL18605
+ ga_test: CL18605
+ master_addresses: zjy-hadoop-prc-ct01.bj:15000,zjy-hadoop-prc-ct02.bj:15000,zjy-hadoop-prc-ct03.bj:15000
+ azmbcommonprc-hadoop:
+ olap_version: 2
+ region: indmb-aws
+ charge_type: share
+ instance: SSD
+ master_addresses: mb1-hadoop-kudu-prc-ct01.awsind:14000,mb2-hadoop-kudu-prc-ct02.awsind:14000,mb3-hadoop-kudu-prc-ct03.awsind:14000
+ ksmosprc-xiaomi:
+ olap_version: 2
+ region: rusmos-ks
+ charge_type: share
+ instance: SSD
+ master_addresses: mos1-hadoop-kudu-prc-ct01.ksru:14000,mos1-hadoop-kudu-prc-ct02.ksru:14000,mos1-hadoop-kudu-prc-ct03.ksru:14000
+ alsgprc-xiaomi:
+ olap_version: 2
+ region: sg-ali
+ charge_type: share
+ instance: SSD
+ special_db_org:
+ b2c: CL5281
+ master_addresses: sgp1-hadoop-kudu-prc-ct01.alisgp:15000,sgp2-hadoop-kudu-prc-ct02.alisgp:15000,sgp2-hadoop-kudu-prc-ct03.alisgp:15000
+ tjwqstaging-hdd:
+ olap_version: 2
+ region: chnwq-ks
+ charge_type: public_share
+ instance: SSD
+ master_addresses: tj1-hadoop-kudu-tst-ct01.kscn:18600,tj1-hadoop-kudu-tst-ct02.kscn:18600,tj1-hadoop-kudu-tst-ct03.kscn:18600
+ tjwqtst-dev:
+ olap_version: 2
+ region: chnwq-ks
+ charge_type: public_share
+ instance: SSD
+ master_addresses: tj1-hadoop-kudu-tst-ct01.kscn:15000,tj1-hadoop-kudu-tst-ct02.kscn:15000,tj1-hadoop-kudu-tst-ct03.kscn:15000
+ c3tst-test:
+ olap_version: 2
+ region: chnbj-idc
+ charge_type: public_share
+ instance: HDD
+ master_addresses: c3-hadoop-kudu-prc-ct01.bj:15000,c3-hadoop-kudu-prc-ct02.bj:15000,c3-hadoop-kudu-prc-ct03.bj:15000
+ c3tst-dev:
+ olap_version: 2
+ region: chnbj-idc
+ charge_type: public_share
+ instance: HDD
+ master_addresses: c3-hadoop-kudu-prc-ct01.bj:18000,c3-hadoop-kudu-prc-ct02.bj:18000,c3-hadoop-kudu-prc-ct03.bj:18000
diff --git a/src/kudu/scripts/minos_control_server.py b/src/kudu/scripts/minos_control_server.py
new file mode 100755
index 0000000..1d74878
--- /dev/null
+++ b/src/kudu/scripts/minos_control_server.py
@@ -0,0 +1,225 @@
+#! /usr/bin/env python
+# coding=utf-8
+
+# A tool for restarting servers, typically to restart tservers in kudu cluster
+
+import sys
+import commands
+import time
+import json
+import re
+import os
+import subprocess
+
+cluster = '' # cluster name in minos config
+job = 'tablet_server' # job name in minos config
+operate = 'stop' # minos operate type, currently support: restart, stop, rolling_update
+tasks = range(0, 5) # an int element list, e.g. '[n]' for a single node, or 'range(m, n)' for several nodes
+flags = '' # minos flags, e.g. '--update_config' for updating config
+known_unhealth_nodes = set()
+#known_unhealth_nodes.add() # it's ok to add some known unhealth nodes, e.g. some already stoped servers
+default_follower_unavailable_considered_failed_sec = 300 # default value of follower_unavailable_considered_failed_sec
+rebalance_cluster_after_operation = True # whether to rebalance cluster after operation
+
+def get_minos_type(cluster_name):
+ minos_type = 'null'
+ minos_clinet_path = None
+
+ minos_config_file = os.getenv('MINOS_CONFIG_FILE')
+ minos_clinet_dir = os.getenv('MINOS_CLIENT_DIR')
+ if minos_config_file is not None and minos_clinet_dir is not None:
+ minos_config_dir = os.path.dirname(minos_config_file)
+ minos_config = '%s/xiaomi-config/conf/kudu/kudu-%s.cfg' % (minos_config_dir, cluster_name)
+ if os.path.exists(minos_config) and os.path.exists(minos_clinet_dir + '/deploy'):
+ return 'minos1.0', minos_clinet_dir
+
+ minos2_config_file = os.getenv('MINOS2_CONFIG_FILE')
+ minos2_clinet_dir = os.getenv('MINOS2_CLIENT_DIR')
+ if minos2_config_file is not None and minos2_clinet_dir is not None:
+ minos2_config_dir = os.path.dirname(minos2_config_file)
+ minos2_config = '%s/xiaomi-config/conf/kudu/kudu-%s.yaml' % (minos2_config_dir, cluster_name)
+ if os.path.exists(minos2_config) and os.path.exists(minos2_clinet_dir + '/deploy'):
+ return 'minos2.0', minos2_clinet_dir
+
+ return minos_type, minos_clinet_path
+
+def get_host(host_port):
+ return host_port.split(':')[0]
+
+def is_cluster_health():
+ status, output = commands.getstatusoutput('${KUDU_HOME}/kudu cluster ksck @%s -consensus=false'
+ ' -ksck_format=json_compact -color=never'
+ ' -sections=MASTER_SUMMARIES,TSERVER_SUMMARIES,TABLE_SUMMARIES'
+ ' 2>/dev/null'
+ % cluster)
+ unhealth_nodes = set()
+ if status == 0 or status == 256:
+ ksck_info = json.loads(output)
+ for master in ksck_info['master_summaries']:
+ if master['health'] != 'HEALTHY':
+ unhealth_nodes.add(get_host(master['address']))
+ for tserver in ksck_info['tserver_summaries']:
+ if tserver['health'] != 'HEALTHY':
+ unhealth_nodes.add(get_host(tserver['address']))
+ if 'table_summaries' in ksck_info:
+ for table in ksck_info['table_summaries']:
+ if table['health'] != 'HEALTHY':
+ unhealth_nodes.add(table['name'])
+ else:
+ unhealth_nodes.add('mockone')
+
+ return unhealth_nodes
+
+
+def check_parameter(message, parameter, allow_empty = False):
+ print(message % parameter)
+ answer = sys.stdin.readline().strip('\n').lower()
+ if answer != 'y' and answer != '':
+ exit()
+ if (not allow_empty and
+ (not parameter or
+ (isinstance(parameter, list) and len(parameter) == 0) or
+ (isinstance(parameter, str) and parameter.strip() == ''))):
+ print(time_header() + 'You should provide a valid parameter')
+ exit()
+
+
+def wait_cluster_health():
+ print(time_header() + 'Wait cluster to be health ...')
+ nodes = is_cluster_health()
+ health = (len(nodes) == 0)
+ while not health:
+ health = True
+ for node in nodes:
+ if node not in known_unhealth_nodes:
+ health = False
+ print(time_header() + 'Unhealthy node: ' + node)
+ time.sleep(5)
+ nodes = is_cluster_health()
+ break
+
+
+def parse_node_from_minos_output(output, job):
+ host = ''
+ regex = re.compile('[a-zA-Z\s]*[tT]ask [0-9]+ of (%s) on ([0-9a-z-.]+)\(0\).+' % job)
+ match = regex.search(output)
+ if match is not None:
+ host = match.group(2)
+ else:
+ print(time_header() + 'Fail to parse node from minos output')
+ exit()
+ return host
+
+
+def time_header():
+ return time.strftime("%Y-%m-%d %H:%M:%S ", time.localtime())
+
+
+def get_tservers_info():
+ tservers_info = dict()
+ status, output = commands.getstatusoutput('${KUDU_HOME}/kudu tserver list @%s -format=json'
+ % cluster)
+ if status == 0 or status == 256:
+ tservers_info = json.loads(output)
+ return tservers_info
+
+
+def get_tablet_server_info(hostname, tservers_info):
+ rpc_address = ''
+ uuid = ''
+ for tserver in tservers_info:
+ if hostname in tserver['rpc-addresses']:
+ rpc_address = tserver['rpc-addresses']
+ uuid = tserver['uuid']
+ break
+ return rpc_address, uuid
+
+
+def set_flag(rpc_address, seconds):
+ cmd = ('${KUDU_HOME}/kudu tserver set_flag %s follower_unavailable_considered_failed_sec %s'
+ % (rpc_address, seconds))
+ status, output = commands.getstatusoutput(cmd)
+
+
+def rebalance_cluster(blacklist_tserver_uuid):
+ ignored_tservers_uuid = set()
+ for node in known_unhealth_nodes:
+ rpc_address, uuid = get_tablet_server_info(node, tservers_info)
+ ignored_tservers_uuid.add(uuid)
+ cmd = ('${KUDU_HOME}/kudu cluster rebalance @%s -blacklist_tservers=%s -ignored_tservers=%s'
+ % (cluster, blacklist_tserver_uuid, str(','.join(ignored_tservers_uuid))))
+ p = subprocess.Popen(cmd, stdout = subprocess.PIPE, shell=True)
+ for line in iter(p.stdout.readline, b''):
+ print line
+ p.stdout.close()
+ p.wait()
+
+
+check_parameter('You will operate on cluster: %s? (y/n)', cluster)
+minos_type, minos_client_path = get_minos_type(cluster)
+if minos_type == 'null' or minos_client_path is None:
+ print("You should set these environment variables:\n* MINOS_CONFIG_FILE\n* MINOS_CLIENT_DIR\n" +
+ "* MINOS2_CONFIG_FILE\n* MINOS2_CLIENT_DIR\nand check cluster name")
+ exit()
+check_parameter('The minos type is: %s? (y/n)', minos_type)
+check_parameter('The minos client path is: %s? (y/n)', minos_client_path)
+check_parameter('You will operate on job: %s? (y/n)', job)
+check_parameter('You will operate on tasks: %s? (y/n)', tasks)
+check_parameter('The operate is: %s? (y/n)', operate)
+if operate == 'rolling_update' and flags.find('--update_package') == -1:
+ flags += ' --update_package'
+ if minos_type == 'minos2.0' and flags.find('--confirm_install') == -1:
+ flags += ' --confirm_install'
+check_parameter('The extra flags are: %s? (y/n)', flags, True)
+check_parameter('The known unhealth nodes are: %s? (y/n)', ','.join(known_unhealth_nodes), True)
+check_parameter('The default value of follower_unavailable_considered_failed_sec is: %s? (y/n)',
+ default_follower_unavailable_considered_failed_sec, True)
+check_parameter('You will rebalance cluster after operation: %s? (y/n)', rebalance_cluster_after_operation, True)
+
+tservers_info = get_tservers_info()
+wait_cluster_health()
+
+if 'tablet_server' in job and operate in ['restart', 'rolling_update']:
+ for tserver in tservers_info:
+ set_flag(tserver['rpc-addresses'], 7200)
+
+for task in tasks:
+ if not isinstance(task, int):
+ print(time_header() + '%s is not a valid integer task id' % str(task))
+ exit()
+
+ if 'tablet_server' in job:
+ cmd = ('%s/deploy show kudu %s --job %s --task %d'
+ % (minos_client_path, cluster, job, task))
+ status, output = commands.getstatusoutput(cmd)
+ print(output)
+ hostname = parse_node_from_minos_output(output, job)
+ rpc_address, uuid = get_tablet_server_info(hostname, tservers_info)
+ if operate == 'stop':
+ # migrate replicas on tserver
+ rebalance_cluster(uuid)
+
+ print(time_header() + 'Start to operate on task %d' % task)
+ cmd = ('%s/deploy %s kudu %s --job %s --task %d --skip_confirm %s'
+ % (minos_client_path, operate, cluster, job, task, flags))
+ status, output = commands.getstatusoutput(cmd)
+ print(output)
+ if operate == 'stop':
+ known_unhealth_nodes.add(parse_node_from_minos_output(output, job))
+
+ wait_cluster_health()
+
+ if 'tablet_server' in job and operate in ['restart', 'rolling_update']:
+ set_flag(rpc_address, 7200)
+
+ print(time_header() + '==========================')
+ time.sleep(10)
+
+if 'tablet_server' in job and operate in ['restart', 'rolling_update']:
+ for tserver in tservers_info:
+ set_flag(tserver['rpc-addresses'], default_follower_unavailable_considered_failed_sec)
+
+if rebalance_cluster_after_operation:
+ rebalance_cluster('')
+
+print(time_header() + 'Complete sucessfully')