You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2016/03/28 18:40:04 UTC

arrow git commit: ARROW-79: [Python] Add benchmarks

Repository: arrow
Updated Branches:
  refs/heads/master ecadd0bcb -> 80ec2c17f


ARROW-79: [Python] Add benchmarks

Run them using `asv run --python=same` or `asv dev`.

Author: Uwe L. Korn <uw...@xhochy.com>

Closes #44 from xhochy/arrow-79 and squashes the following commits:

d3c6401 [Uwe L. Korn] Move benchmarks to toplevel folder
2737f18 [Uwe L. Korn] ARROW-79: [Python] Add benchmarks


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/80ec2c17
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/80ec2c17
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/80ec2c17

Branch: refs/heads/master
Commit: 80ec2c17fccac484993868f951d95362cb75cea9
Parents: ecadd0b
Author: Uwe L. Korn <uw...@xhochy.com>
Authored: Mon Mar 28 09:39:55 2016 -0700
Committer: Wes McKinney <we...@apache.org>
Committed: Mon Mar 28 09:39:55 2016 -0700

----------------------------------------------------------------------
 python/.gitignore             |  3 ++
 python/asv.conf.json          | 73 ++++++++++++++++++++++++++++++++++++++
 python/benchmarks/__init__.py | 17 +++++++++
 python/benchmarks/array.py    | 38 ++++++++++++++++++++
 python/doc/Benchmarks.md      | 11 ++++++
 5 files changed, 142 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/80ec2c17/python/.gitignore
----------------------------------------------------------------------
diff --git a/python/.gitignore b/python/.gitignore
index 80103a1..3cb591e 100644
--- a/python/.gitignore
+++ b/python/.gitignore
@@ -35,3 +35,6 @@ dist
 # coverage
 .coverage
 coverage.xml
+
+# benchmark working dir
+.asv

http://git-wip-us.apache.org/repos/asf/arrow/blob/80ec2c17/python/asv.conf.json
----------------------------------------------------------------------
diff --git a/python/asv.conf.json b/python/asv.conf.json
new file mode 100644
index 0000000..96beba6
--- /dev/null
+++ b/python/asv.conf.json
@@ -0,0 +1,73 @@
+{
+    // The version of the config file format.  Do not change, unless
+    // you know what you are doing.
+    "version": 1,
+
+    // The name of the project being benchmarked
+    "project": "pyarrow",
+
+    // The project's homepage
+    "project_url": "https://arrow.apache.org/",
+
+    // The URL or local path of the source code repository for the
+    // project being benchmarked
+    "repo": "https://github.com/apache/arrow/",
+
+    // List of branches to benchmark. If not provided, defaults to "master"
+    // (for git) or "tip" (for mercurial).
+    // "branches": ["master"], // for git
+    // "branches": ["tip"],    // for mercurial
+
+    // The DVCS being used.  If not set, it will be automatically
+    // determined from "repo" by looking at the protocol in the URL
+    // (if remote), or by looking for special directories, such as
+    // ".git" (if local).
+    "dvcs": "git",
+
+    // The tool to use to create environments.  May be "conda",
+    // "virtualenv" or other value depending on the plugins in use.
+    // If missing or the empty string, the tool will be automatically
+    // determined by looking for tools on the PATH environment
+    // variable.
+    "environment_type": "virtualenv",
+
+    // the base URL to show a commit for the project.
+    "show_commit_url": "https://github.com/apache/arrow/commit/",
+
+    // The Pythons you'd like to test against.  If not provided, defaults
+    // to the current version of Python used to run `asv`.
+    // "pythons": ["2.7", "3.3"],
+
+    // The matrix of dependencies to test.  Each key is the name of a
+    // package (in PyPI) and the values are version numbers.  An empty
+    // list indicates to just test against the default (latest)
+    // version.
+    // "matrix": {
+    //     "numpy": ["1.6", "1.7"]
+    // },
+
+    // The directory (relative to the current directory) that benchmarks are
+    // stored in.  If not provided, defaults to "benchmarks"
+    "benchmark_dir": "benchmarks",
+
+    // The directory (relative to the current directory) to cache the Python
+    // environments in.  If not provided, defaults to "env"
+    "env_dir": ".asv/env",
+
+
+    // The directory (relative to the current directory) that raw benchmark
+    // results are stored in.  If not provided, defaults to "results".
+    "results_dir": ".asv/results",
+
+    // The directory (relative to the current directory) that the html tree
+    // should be written to.  If not provided, defaults to "html".
+    "html_dir": "build/benchmarks/html",
+
+    // The number of characters to retain in the commit hashes.
+    // "hash_length": 8,
+
+    // `asv` will cache wheels of the recent builds in each
+    // environment, making them faster to install next time.  This is
+    // number of builds to keep, per environment.
+    // "wheel_cache_size": 0
+}

http://git-wip-us.apache.org/repos/asf/arrow/blob/80ec2c17/python/benchmarks/__init__.py
----------------------------------------------------------------------
diff --git a/python/benchmarks/__init__.py b/python/benchmarks/__init__.py
new file mode 100644
index 0000000..2456923
--- /dev/null
+++ b/python/benchmarks/__init__.py
@@ -0,0 +1,17 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+

http://git-wip-us.apache.org/repos/asf/arrow/blob/80ec2c17/python/benchmarks/array.py
----------------------------------------------------------------------
diff --git a/python/benchmarks/array.py b/python/benchmarks/array.py
new file mode 100644
index 0000000..6ab73d1
--- /dev/null
+++ b/python/benchmarks/array.py
@@ -0,0 +1,38 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import pyarrow
+
+class Conversions(object):
+    params = (1, 10 ** 5, 10 ** 6, 10 ** 7)
+
+    def time_from_pylist(self, n):
+        pyarrow.from_pylist(list(range(n)))
+
+    def peakmem_from_pylist(self, n):
+        pyarrow.from_pylist(list(range(n)))
+
+class ScalarAccess(object):
+    params = (1, 10 ** 5, 10 ** 6, 10 ** 7)
+
+    def setUp(self, n):
+        self._array = pyarrow.from_pylist(list(range(n)))
+
+    def time_as_py(self, n):
+        for i in range(n):
+            self._array[i].as_py()
+

http://git-wip-us.apache.org/repos/asf/arrow/blob/80ec2c17/python/doc/Benchmarks.md
----------------------------------------------------------------------
diff --git a/python/doc/Benchmarks.md b/python/doc/Benchmarks.md
new file mode 100644
index 0000000..8edfb62
--- /dev/null
+++ b/python/doc/Benchmarks.md
@@ -0,0 +1,11 @@
+## Benchmark Requirements
+
+The benchmarks are run using [asv][1] which is also their only requirement.
+
+## Running the benchmarks
+
+To run the benchmarks, call `asv run --python=same`. You cannot use the
+plain `asv run` command at the moment as asv cannot handle python packages
+in subdirectories of a repository.
+
+[1]: https://asv.readthedocs.org/