You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2020/06/23 13:52:12 UTC
[arrow] branch master updated: ARROW-9201: [Archery] More
user-friendly console output for benchmark diffs, add repetitions argument,
don't build unit tests
This is an automated email from the ASF dual-hosted git repository.
wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 0d1b2f8 ARROW-9201: [Archery] More user-friendly console output for benchmark diffs, add repetitions argument, don't build unit tests
0d1b2f8 is described below
commit 0d1b2f8e71583d4b2c6fe1d953936351403ef731
Author: Wes McKinney <we...@apache.org>
AuthorDate: Tue Jun 23 08:51:48 2020 -0500
ARROW-9201: [Archery] More user-friendly console output for benchmark diffs, add repetitions argument, don't build unit tests
This uses pandas to generate a sorted text table when using `archery benchmark diff`. Example:
https://github.com/apache/arrow/pull/7506#issuecomment-647633470
There's some other incidental changes
* pandas is required for `archery benchmark diff`. I don't think there's value in reimplementing the stuff that pandas can do in a few lines of code (read JSON, create a sorted table and print it nicely for us).
* The default # of benchmarks repetitions has been changed from 10 to 1 (see ARROW-9155 for context). IMHO more interactive benchmark results is more useful than higher precision. If you need higher precision you can pass `--repetitions=10` on the command line
* `archery benchmark` was building the unit tests unnecessarily. This also occluded a bug ARROW-9209, which is fixed here
Closes #7516 from wesm/ARROW-9201
Authored-by: Wes McKinney <we...@apache.org>
Signed-off-by: Wes McKinney <we...@apache.org>
---
cpp/CMakeLists.txt | 6 ++++-
dev/archery/archery/benchmark/compare.py | 2 ++
dev/archery/archery/benchmark/google.py | 10 ++++----
dev/archery/archery/benchmark/runner.py | 23 ++++++++++++++----
dev/archery/archery/cli.py | 40 +++++++++++++++++++++++++++-----
dev/archery/archery/lang/cpp.py | 2 +-
dev/archery/setup.py | 1 +
7 files changed, 67 insertions(+), 17 deletions(-)
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 8c05d4b..852bc0e 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -300,7 +300,11 @@ if(ARROW_BUILD_BENCHMARKS
set(ARROW_JSON ON)
endif()
-if(ARROW_CUDA OR ARROW_FLIGHT OR ARROW_PARQUET OR ARROW_BUILD_TESTS)
+if(ARROW_CUDA
+ OR ARROW_FLIGHT
+ OR ARROW_PARQUET
+ OR ARROW_BUILD_TESTS
+ OR ARROW_BUILD_BENCHMARKS)
set(ARROW_IPC ON)
endif()
diff --git a/dev/archery/archery/benchmark/compare.py b/dev/archery/archery/benchmark/compare.py
index b059712..474f6d4 100644
--- a/dev/archery/archery/benchmark/compare.py
+++ b/dev/archery/archery/benchmark/compare.py
@@ -118,6 +118,7 @@ class BenchmarkComparator:
"contender": fmt(self.contender.value),
"unit": self.unit,
"less_is_better": self.less_is_better,
+ "counters": str(self.baseline.counters)
}
def compare(self, comparator=None):
@@ -129,6 +130,7 @@ class BenchmarkComparator:
"contender": self.contender.value,
"unit": self.unit,
"less_is_better": self.less_is_better,
+ "counters": self.baseline.counters
}
def __call__(self, **kwargs):
diff --git a/dev/archery/archery/benchmark/google.py b/dev/archery/archery/benchmark/google.py
index 5b2176c..3fa7a76 100644
--- a/dev/archery/archery/benchmark/google.py
+++ b/dev/archery/archery/benchmark/google.py
@@ -30,9 +30,6 @@ def partition(pred, iterable):
return list(filter(pred, t1)), list(filterfalse(pred, t2))
-DEFAULT_REPETITIONS = 10
-
-
class GoogleBenchmarkCommand(Command):
""" Run a google benchmark binary.
@@ -52,7 +49,7 @@ class GoogleBenchmarkCommand(Command):
stderr=subprocess.PIPE)
return str.splitlines(result.stdout.decode("utf-8"))
- def results(self, repetitions=DEFAULT_REPETITIONS):
+ def results(self, repetitions=1):
with NamedTemporaryFile() as out:
argv = ["--benchmark_repetitions={}".format(repetitions),
"--benchmark_out={}".format(out.name),
@@ -92,7 +89,7 @@ class GoogleBenchmarkObservation:
"""
def __init__(self, name, real_time, cpu_time, time_unit, size=None,
- bytes_per_second=None, items_per_second=None, **kwargs):
+ bytes_per_second=None, items_per_second=None, **counters):
self._name = name
self.real_time = real_time
self.cpu_time = cpu_time
@@ -100,6 +97,7 @@ class GoogleBenchmarkObservation:
self.size = size
self.bytes_per_second = bytes_per_second
self.items_per_second = items_per_second
+ self.counters = counters
@property
def is_agg(self):
@@ -160,6 +158,8 @@ class GoogleBenchmark(Benchmark):
unit = self.runs[0].unit
less_is_better = not unit.endswith("per_second")
values = [b.value for b in self.runs]
+ # Slight kludge to extract the UserCounters for each benchmark
+ self.counters = self.runs[0].counters
super().__init__(name, unit, less_is_better, values)
def __repr__(self):
diff --git a/dev/archery/archery/benchmark/runner.py b/dev/archery/archery/benchmark/runner.py
index e36696b..377e770 100644
--- a/dev/archery/archery/benchmark/runner.py
+++ b/dev/archery/archery/benchmark/runner.py
@@ -34,10 +34,15 @@ def regex_filter(re_expr):
return lambda s: re_comp.search(s)
+DEFAULT_REPETITIONS = 1
+
+
class BenchmarkRunner:
- def __init__(self, suite_filter=None, benchmark_filter=None):
+ def __init__(self, suite_filter=None, benchmark_filter=None,
+ repetitions=DEFAULT_REPETITIONS):
self.suite_filter = suite_filter
self.benchmark_filter = benchmark_filter
+ self.repetitions = repetitions
@property
def suites(self):
@@ -137,8 +142,18 @@ class CppBenchmarkRunner(BenchmarkRunner):
def default_configuration(**kwargs):
""" Returns the default benchmark configuration. """
return CppConfiguration(
- build_type="release", with_tests=True, with_benchmarks=True,
- with_compute=True, with_python=False, **kwargs)
+ build_type="release", with_tests=False, with_benchmarks=True,
+ with_compute=True,
+ with_dataset=True,
+ with_parquet=True,
+ with_python=False,
+ with_brotli=True,
+ with_bz2=True,
+ with_lz4=True,
+ with_snappy=True,
+ with_zlib=True,
+ with_zstd=True,
+ **kwargs)
@property
def suites_binaries(self):
@@ -158,7 +173,7 @@ class CppBenchmarkRunner(BenchmarkRunner):
if not benchmark_names:
return None
- results = suite_cmd.results()
+ results = suite_cmd.results(repetitions=self.repetitions)
benchmarks = GoogleBenchmark.from_json(results.get("benchmarks"))
return BenchmarkSuite(name, benchmarks)
diff --git a/dev/archery/archery/cli.py b/dev/archery/archery/cli.py
index a5383d9..2622fbf 100644
--- a/dev/archery/archery/cli.py
+++ b/dev/archery/archery/cli.py
@@ -16,6 +16,7 @@
# under the License.
from collections import namedtuple
+from io import StringIO
import click
import errno
import json
@@ -467,6 +468,9 @@ def benchmark_run(ctx, rev_or_path, src, preserve, output, cmake_extras,
@click.option("--threshold", type=float, default=DEFAULT_THRESHOLD,
show_default=True,
help="Regression failure threshold in percentage.")
+@click.option("--repetitions", type=int, default=1, show_default=True,
+ help=("Number of repetitions of each benchmark. Increasing "
+ "may improve result precision."))
@click.argument("contender", metavar="[<contender>",
default=ArrowSources.WORKSPACE, required=False)
@click.argument("baseline", metavar="[<baseline>]]", default="origin/master",
@@ -474,7 +478,7 @@ def benchmark_run(ctx, rev_or_path, src, preserve, output, cmake_extras,
@click.pass_context
def benchmark_diff(ctx, src, preserve, output, cmake_extras,
suite_filter, benchmark_filter,
- threshold, contender, baseline, **kwargs):
+ repetitions, threshold, contender, baseline, **kwargs):
"""Compare (diff) benchmark runs.
This command acts like git-diff but for benchmark results.
@@ -554,17 +558,41 @@ def benchmark_diff(ctx, src, preserve, output, cmake_extras,
runner_cont = BenchmarkRunner.from_rev_or_path(
src, root, contender, conf,
- suite_filter=suite_filter, benchmark_filter=benchmark_filter)
+ repetitions=repetitions,
+ suite_filter=suite_filter,
+ benchmark_filter=benchmark_filter)
runner_base = BenchmarkRunner.from_rev_or_path(
src, root, baseline, conf,
- suite_filter=suite_filter, benchmark_filter=benchmark_filter)
+ repetitions=repetitions,
+ suite_filter=suite_filter,
+ benchmark_filter=benchmark_filter)
runner_comp = RunnerComparator(runner_cont, runner_base, threshold)
# TODO(kszucs): test that the output is properly formatted jsonlines
- for comparator in runner_comp.comparisons:
- json.dump(comparator, output, cls=JsonEncoder)
- output.write("\n")
+ comparisons_json = _get_comparisons_as_json(runner_comp.comparisons)
+ formatted = _format_comparisons_with_pandas(comparisons_json)
+ output.write(formatted)
+ output.write('\n')
+
+
+def _get_comparisons_as_json(comparisons):
+ buf = StringIO()
+ for comparator in comparisons:
+ json.dump(comparator, buf, cls=JsonEncoder)
+ buf.write("\n")
+
+ return buf.getvalue()
+
+
+def _format_comparisons_with_pandas(comparisons_json):
+ import pandas as pd
+ df = pd.read_json(StringIO(comparisons_json), lines=True)
+ # parse change % so we can sort by it
+ df['change %'] = df.pop('change').str[:-1].map(float)
+ df = df[['benchmark', 'baseline', 'contender', 'change %', 'counters']]
+ df = df.sort_values(by='change %', ascending=False)
+ return df.to_string()
# ----------------------------------------------------------------------
diff --git a/dev/archery/archery/lang/cpp.py b/dev/archery/archery/lang/cpp.py
index 4af0f54..89ecd3f 100644
--- a/dev/archery/archery/lang/cpp.py
+++ b/dev/archery/archery/lang/cpp.py
@@ -52,7 +52,7 @@ class CppConfiguration:
with_compute=None, with_csv=None, with_cuda=None,
with_dataset=None, with_filesystem=None, with_flight=None,
with_gandiva=None, with_hdfs=None, with_hiveserver2=None,
- with_ipc=None, with_json=None, with_jni=None,
+ with_ipc=True, with_json=None, with_jni=None,
with_mimalloc=None,
with_parquet=None, with_plasma=None, with_python=True,
with_r=None, with_s3=None,
diff --git a/dev/archery/setup.py b/dev/archery/setup.py
index 8bcd45d..dd1b555 100755
--- a/dev/archery/setup.py
+++ b/dev/archery/setup.py
@@ -25,6 +25,7 @@ if sys.version_info < (3, 5):
sys.exit('Python < 3.5 is not supported')
extras = {
+ 'benchmark': ['pandas'],
'bot': ['ruamel.yaml', 'pygithub'],
'docker': ['ruamel.yaml', 'python-dotenv']
}