You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2020/06/23 13:52:12 UTC

[arrow] branch master updated: ARROW-9201: [Archery] More user-friendly console output for benchmark diffs, add repetitions argument, don't build unit tests

This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 0d1b2f8  ARROW-9201: [Archery] More user-friendly console output for benchmark diffs, add repetitions argument, don't build unit tests
0d1b2f8 is described below

commit 0d1b2f8e71583d4b2c6fe1d953936351403ef731
Author: Wes McKinney <we...@apache.org>
AuthorDate: Tue Jun 23 08:51:48 2020 -0500

    ARROW-9201: [Archery] More user-friendly console output for benchmark diffs, add repetitions argument, don't build unit tests
    
    This uses pandas to generate a sorted text table when using `archery benchmark diff`. Example:
    
    https://github.com/apache/arrow/pull/7506#issuecomment-647633470
    
    There's some other incidental changes
    
    * pandas is required for `archery benchmark diff`. I don't think there's value in reimplementing the stuff that pandas can do in a few lines of code (read JSON, create a sorted table and print it nicely for us).
    * The default # of benchmarks repetitions has been changed from 10 to 1 (see ARROW-9155 for context). IMHO more interactive benchmark results is more useful than higher precision. If you need higher precision you can pass `--repetitions=10` on the command line
    * `archery benchmark` was building the unit tests unnecessarily. This also occluded a bug ARROW-9209, which is fixed here
    
    Closes #7516 from wesm/ARROW-9201
    
    Authored-by: Wes McKinney <we...@apache.org>
    Signed-off-by: Wes McKinney <we...@apache.org>
---
 cpp/CMakeLists.txt                       |  6 ++++-
 dev/archery/archery/benchmark/compare.py |  2 ++
 dev/archery/archery/benchmark/google.py  | 10 ++++----
 dev/archery/archery/benchmark/runner.py  | 23 ++++++++++++++----
 dev/archery/archery/cli.py               | 40 +++++++++++++++++++++++++++-----
 dev/archery/archery/lang/cpp.py          |  2 +-
 dev/archery/setup.py                     |  1 +
 7 files changed, 67 insertions(+), 17 deletions(-)

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 8c05d4b..852bc0e 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -300,7 +300,11 @@ if(ARROW_BUILD_BENCHMARKS
   set(ARROW_JSON ON)
 endif()
 
-if(ARROW_CUDA OR ARROW_FLIGHT OR ARROW_PARQUET OR ARROW_BUILD_TESTS)
+if(ARROW_CUDA
+   OR ARROW_FLIGHT
+   OR ARROW_PARQUET
+   OR ARROW_BUILD_TESTS
+   OR ARROW_BUILD_BENCHMARKS)
   set(ARROW_IPC ON)
 endif()
 
diff --git a/dev/archery/archery/benchmark/compare.py b/dev/archery/archery/benchmark/compare.py
index b059712..474f6d4 100644
--- a/dev/archery/archery/benchmark/compare.py
+++ b/dev/archery/archery/benchmark/compare.py
@@ -118,6 +118,7 @@ class BenchmarkComparator:
             "contender": fmt(self.contender.value),
             "unit": self.unit,
             "less_is_better": self.less_is_better,
+            "counters": str(self.baseline.counters)
         }
 
     def compare(self, comparator=None):
@@ -129,6 +130,7 @@ class BenchmarkComparator:
             "contender": self.contender.value,
             "unit": self.unit,
             "less_is_better": self.less_is_better,
+            "counters": self.baseline.counters
         }
 
     def __call__(self, **kwargs):
diff --git a/dev/archery/archery/benchmark/google.py b/dev/archery/archery/benchmark/google.py
index 5b2176c..3fa7a76 100644
--- a/dev/archery/archery/benchmark/google.py
+++ b/dev/archery/archery/benchmark/google.py
@@ -30,9 +30,6 @@ def partition(pred, iterable):
     return list(filter(pred, t1)), list(filterfalse(pred, t2))
 
 
-DEFAULT_REPETITIONS = 10
-
-
 class GoogleBenchmarkCommand(Command):
     """ Run a google benchmark binary.
 
@@ -52,7 +49,7 @@ class GoogleBenchmarkCommand(Command):
                           stderr=subprocess.PIPE)
         return str.splitlines(result.stdout.decode("utf-8"))
 
-    def results(self, repetitions=DEFAULT_REPETITIONS):
+    def results(self, repetitions=1):
         with NamedTemporaryFile() as out:
             argv = ["--benchmark_repetitions={}".format(repetitions),
                     "--benchmark_out={}".format(out.name),
@@ -92,7 +89,7 @@ class GoogleBenchmarkObservation:
     """
 
     def __init__(self, name, real_time, cpu_time, time_unit, size=None,
-                 bytes_per_second=None, items_per_second=None, **kwargs):
+                 bytes_per_second=None, items_per_second=None, **counters):
         self._name = name
         self.real_time = real_time
         self.cpu_time = cpu_time
@@ -100,6 +97,7 @@ class GoogleBenchmarkObservation:
         self.size = size
         self.bytes_per_second = bytes_per_second
         self.items_per_second = items_per_second
+        self.counters = counters
 
     @property
     def is_agg(self):
@@ -160,6 +158,8 @@ class GoogleBenchmark(Benchmark):
         unit = self.runs[0].unit
         less_is_better = not unit.endswith("per_second")
         values = [b.value for b in self.runs]
+        # Slight kludge to extract the UserCounters for each benchmark
+        self.counters = self.runs[0].counters
         super().__init__(name, unit, less_is_better, values)
 
     def __repr__(self):
diff --git a/dev/archery/archery/benchmark/runner.py b/dev/archery/archery/benchmark/runner.py
index e36696b..377e770 100644
--- a/dev/archery/archery/benchmark/runner.py
+++ b/dev/archery/archery/benchmark/runner.py
@@ -34,10 +34,15 @@ def regex_filter(re_expr):
     return lambda s: re_comp.search(s)
 
 
+DEFAULT_REPETITIONS = 1
+
+
 class BenchmarkRunner:
-    def __init__(self, suite_filter=None, benchmark_filter=None):
+    def __init__(self, suite_filter=None, benchmark_filter=None,
+                 repetitions=DEFAULT_REPETITIONS):
         self.suite_filter = suite_filter
         self.benchmark_filter = benchmark_filter
+        self.repetitions = repetitions
 
     @property
     def suites(self):
@@ -137,8 +142,18 @@ class CppBenchmarkRunner(BenchmarkRunner):
     def default_configuration(**kwargs):
         """ Returns the default benchmark configuration. """
         return CppConfiguration(
-            build_type="release", with_tests=True, with_benchmarks=True,
-            with_compute=True, with_python=False, **kwargs)
+            build_type="release", with_tests=False, with_benchmarks=True,
+            with_compute=True,
+            with_dataset=True,
+            with_parquet=True,
+            with_python=False,
+            with_brotli=True,
+            with_bz2=True,
+            with_lz4=True,
+            with_snappy=True,
+            with_zlib=True,
+            with_zstd=True,
+            **kwargs)
 
     @property
     def suites_binaries(self):
@@ -158,7 +173,7 @@ class CppBenchmarkRunner(BenchmarkRunner):
         if not benchmark_names:
             return None
 
-        results = suite_cmd.results()
+        results = suite_cmd.results(repetitions=self.repetitions)
         benchmarks = GoogleBenchmark.from_json(results.get("benchmarks"))
         return BenchmarkSuite(name, benchmarks)
 
diff --git a/dev/archery/archery/cli.py b/dev/archery/archery/cli.py
index a5383d9..2622fbf 100644
--- a/dev/archery/archery/cli.py
+++ b/dev/archery/archery/cli.py
@@ -16,6 +16,7 @@
 # under the License.
 
 from collections import namedtuple
+from io import StringIO
 import click
 import errno
 import json
@@ -467,6 +468,9 @@ def benchmark_run(ctx, rev_or_path, src, preserve, output, cmake_extras,
 @click.option("--threshold", type=float, default=DEFAULT_THRESHOLD,
               show_default=True,
               help="Regression failure threshold in percentage.")
+@click.option("--repetitions", type=int, default=1, show_default=True,
+              help=("Number of repetitions of each benchmark. Increasing "
+                    "may improve result precision."))
 @click.argument("contender", metavar="[<contender>",
                 default=ArrowSources.WORKSPACE, required=False)
 @click.argument("baseline", metavar="[<baseline>]]", default="origin/master",
@@ -474,7 +478,7 @@ def benchmark_run(ctx, rev_or_path, src, preserve, output, cmake_extras,
 @click.pass_context
 def benchmark_diff(ctx, src, preserve, output, cmake_extras,
                    suite_filter, benchmark_filter,
-                   threshold, contender, baseline, **kwargs):
+                   repetitions, threshold, contender, baseline, **kwargs):
     """Compare (diff) benchmark runs.
 
     This command acts like git-diff but for benchmark results.
@@ -554,17 +558,41 @@ def benchmark_diff(ctx, src, preserve, output, cmake_extras,
 
         runner_cont = BenchmarkRunner.from_rev_or_path(
             src, root, contender, conf,
-            suite_filter=suite_filter, benchmark_filter=benchmark_filter)
+            repetitions=repetitions,
+            suite_filter=suite_filter,
+            benchmark_filter=benchmark_filter)
         runner_base = BenchmarkRunner.from_rev_or_path(
             src, root, baseline, conf,
-            suite_filter=suite_filter, benchmark_filter=benchmark_filter)
+            repetitions=repetitions,
+            suite_filter=suite_filter,
+            benchmark_filter=benchmark_filter)
 
         runner_comp = RunnerComparator(runner_cont, runner_base, threshold)
 
         # TODO(kszucs): test that the output is properly formatted jsonlines
-        for comparator in runner_comp.comparisons:
-            json.dump(comparator, output, cls=JsonEncoder)
-            output.write("\n")
+        comparisons_json = _get_comparisons_as_json(runner_comp.comparisons)
+        formatted = _format_comparisons_with_pandas(comparisons_json)
+        output.write(formatted)
+        output.write('\n')
+
+
+def _get_comparisons_as_json(comparisons):
+    buf = StringIO()
+    for comparator in comparisons:
+        json.dump(comparator, buf, cls=JsonEncoder)
+        buf.write("\n")
+
+    return buf.getvalue()
+
+
+def _format_comparisons_with_pandas(comparisons_json):
+    import pandas as pd
+    df = pd.read_json(StringIO(comparisons_json), lines=True)
+    # parse change % so we can sort by it
+    df['change %'] = df.pop('change').str[:-1].map(float)
+    df = df[['benchmark', 'baseline', 'contender', 'change %', 'counters']]
+    df = df.sort_values(by='change %', ascending=False)
+    return df.to_string()
 
 
 # ----------------------------------------------------------------------
diff --git a/dev/archery/archery/lang/cpp.py b/dev/archery/archery/lang/cpp.py
index 4af0f54..89ecd3f 100644
--- a/dev/archery/archery/lang/cpp.py
+++ b/dev/archery/archery/lang/cpp.py
@@ -52,7 +52,7 @@ class CppConfiguration:
                  with_compute=None, with_csv=None, with_cuda=None,
                  with_dataset=None, with_filesystem=None, with_flight=None,
                  with_gandiva=None, with_hdfs=None, with_hiveserver2=None,
-                 with_ipc=None, with_json=None, with_jni=None,
+                 with_ipc=True, with_json=None, with_jni=None,
                  with_mimalloc=None,
                  with_parquet=None, with_plasma=None, with_python=True,
                  with_r=None, with_s3=None,
diff --git a/dev/archery/setup.py b/dev/archery/setup.py
index 8bcd45d..dd1b555 100755
--- a/dev/archery/setup.py
+++ b/dev/archery/setup.py
@@ -25,6 +25,7 @@ if sys.version_info < (3, 5):
     sys.exit('Python < 3.5 is not supported')
 
 extras = {
+    'benchmark': ['pandas'],
     'bot': ['ruamel.yaml', 'pygithub'],
     'docker': ['ruamel.yaml', 'python-dotenv']
 }