You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by st...@apache.org on 2022/02/15 23:42:14 UTC
[impala] 01/02: IMPALA-11113 and IMPALA-11114: fixed single_node_perf_run.py for TPCDS
This is an automated email from the ASF dual-hosted git repository.
stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
commit 182617ee87aaf23abe46dd0cc5b82133e4d41803
Author: Gergely Fürnstáhl <gf...@cloudera.com>
AuthorDate: Wed Feb 9 09:53:24 2022 +0100
IMPALA-11113 and IMPALA-11114: fixed single_node_perf_run.py for TPCDS
Fixed the UTF-8 UnicodeDecodeError which was thrown while dumping and
loading the json file. Now the script ignores non-decodable characters.
Fixed the ZeroDevisionError coming from t-test when the standard
deviations were 0. "(N/A) Invalid t-test type" is shown for significant
changes and a hint at the end if any invalid t-test was detected.
Change-Id: I094763188a1f3ddf40b7140c65acf95918a6597f
Reviewed-on: http://gerrit.cloudera.org:8080/18215
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
Reviewed-by: Quanlong Huang <hu...@gmail.com>
---
bin/run-workload.py | 2 +-
bin/single_node_perf_run.py | 2 +-
tests/benchmark/report_benchmark_results.py | 20 ++++++++++++++++----
3 files changed, 18 insertions(+), 6 deletions(-)
diff --git a/bin/run-workload.py b/bin/run-workload.py
index d406627..8d2d2f7 100755
--- a/bin/run-workload.py
+++ b/bin/run-workload.py
@@ -277,6 +277,6 @@ if __name__ == "__main__":
# Store the results
with open(options.results_json_file, 'w') as f:
- json.dump(result_map, f, cls=CustomJSONEncoder)
+ json.dump(result_map, f, cls=CustomJSONEncoder, ensure_ascii=False)
exit(exit_code)
diff --git a/bin/single_node_perf_run.py b/bin/single_node_perf_run.py
index d215019..ae89abe 100755
--- a/bin/single_node_perf_run.py
+++ b/bin/single_node_perf_run.py
@@ -191,7 +191,7 @@ def generate_profile_file(name, hash, base_dir):
Writes the runtime profiles back in a simple text file in the same directory.
"""
with open(name) as fid:
- data = json.load(fid)
+ data = json.loads(fid.read().decode("utf-8", "ignore"))
with open(os.path.join(base_dir, hash + "_profile.txt"), "w+") as out:
# For each query
for key in data:
diff --git a/tests/benchmark/report_benchmark_results.py b/tests/benchmark/report_benchmark_results.py
index 26bf783..f8a2105 100755
--- a/tests/benchmark/report_benchmark_results.py
+++ b/tests/benchmark/report_benchmark_results.py
@@ -32,6 +32,7 @@ from __future__ import division
import difflib
import json
import logging
+import math
import os
import prettytable
import re
@@ -218,7 +219,7 @@ def get_dict_from_json(filename):
cur[RESULT_LIST].append(query_result)
with open(filename, "r") as f:
- data = json.load(f)
+ data = json.loads(f.read().decode("utf-8", "ignore"))
grouped = defaultdict( lambda: defaultdict(
lambda: defaultdict(lambda: defaultdict(list))))
for workload_name, workload in data.items():
@@ -296,6 +297,7 @@ def calculate_time_stats(grouped):
class Report(object):
significant_perf_change = False
+ invalid_t_tests = False
class FileFormatComparisonRow(object):
"""Represents a row in the overview table, where queries are grouped together and
@@ -386,8 +388,13 @@ class Report(object):
def __check_perf_change_significance(self, stat, ref_stat):
zval = calculate_mwu(stat[SORTED], ref_stat[SORTED])
- tval = calculate_tval(stat[AVG], stat[STDDEV], stat[ITERATIONS],
- ref_stat[AVG], ref_stat[STDDEV], ref_stat[ITERATIONS])
+ try:
+ tval = calculate_tval(stat[AVG], stat[STDDEV], stat[ITERATIONS],
+ ref_stat[AVG], ref_stat[STDDEV], ref_stat[ITERATIONS])
+ except ZeroDivisionError:
+ # t-test cannot be performed if both standard deviations are 0
+ tval = float('nan')
+ Report.invalid_t_tests = True
try:
percent_difference = abs(ref_stat[AVG] - stat[AVG]) * 100 / ref_stat[AVG]
except ZeroDivisionError:
@@ -408,6 +415,7 @@ class Report(object):
"""
perf_change_type = ("(R) Regression" if zval >= 0 and tval >= 0
else "(I) Improvement" if zval <= 0 and tval <= 0
+ else "(N/A) Invalid t-test" if math.isnan(tval)
else "(?) Anomoly")
query = result[RESULT_LIST][0][QUERY]
@@ -588,7 +596,11 @@ class Report(object):
output += variability_analysis_str
if Report.significant_perf_change:
- output += 'Significant perf change detected'
+ output += 'Significant perf change detected.\n'
+
+ if Report.invalid_t_tests:
+ output += 'Invalid t-tests detected. It is not possible to perform t-test with ' \
+ '0 standard deviation. Try increasing the number of iterations.\n'
return output