You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2016/04/01 02:28:08 UTC
arrow git commit: ARROW-89: [Python] Add benchmarks for
Arrow<->Pandas conversion
Repository: arrow
Updated Branches:
refs/heads/master 5a68f8d73 -> b3ebce1b3
ARROW-89: [Python] Add benchmarks for Arrow<->Pandas conversion
Author: Uwe L. Korn <uw...@xhochy.com>
Closes #51 from xhochy/arrow-89 and squashes the following commits:
bd6a7cb [Uwe L. Korn] Split benchmarks and add one for a float64 column with NaNs
8f74528 [Uwe L. Korn] ARROW-89: [Python] Add benchmarks for Arrow<->Pandas conversion
Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/b3ebce1b
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/b3ebce1b
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/b3ebce1b
Branch: refs/heads/master
Commit: b3ebce1b3471abbdc4516ff86014aa26bcc99a24
Parents: 5a68f8d
Author: Uwe L. Korn <uw...@xhochy.com>
Authored: Thu Mar 31 17:27:56 2016 -0700
Committer: Wes McKinney <we...@apache.org>
Committed: Thu Mar 31 17:27:56 2016 -0700
----------------------------------------------------------------------
python/benchmarks/array.py | 55 +++++++++++++++++++++++++++++++++++++----
1 file changed, 50 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/arrow/blob/b3ebce1b/python/benchmarks/array.py
----------------------------------------------------------------------
diff --git a/python/benchmarks/array.py b/python/benchmarks/array.py
index 6ab73d1..4268f00 100644
--- a/python/benchmarks/array.py
+++ b/python/benchmarks/array.py
@@ -15,22 +15,67 @@
# specific language governing permissions and limitations
# under the License.
-import pyarrow
+import numpy as np
+import pandas as pd
+import pyarrow as A
-class Conversions(object):
+
+class PyListConversions(object):
+ param_names = ('size',)
params = (1, 10 ** 5, 10 ** 6, 10 ** 7)
+ def setup(self, n):
+ self.data = list(range(n))
+
def time_from_pylist(self, n):
- pyarrow.from_pylist(list(range(n)))
+ A.from_pylist(self.data)
def peakmem_from_pylist(self, n):
- pyarrow.from_pylist(list(range(n)))
+ A.from_pylist(self.data)
+
+
+class PandasConversionsBase(object):
+ def setup(self, n, dtype):
+ if dtype == 'float64_nans':
+ arr = np.arange(n).astype('float64')
+ arr[arr % 10 == 0] = np.nan
+ else:
+ arr = np.arange(n).astype(dtype)
+ self.data = pd.DataFrame({'column': arr})
+
+
+class PandasConversionsToArrow(PandasConversionsBase):
+ param_names = ('size', 'dtype')
+ params = ((1, 10 ** 5, 10 ** 6, 10 ** 7), ('int64', 'float64', 'float64_nans', 'str'))
+
+ def time_from_series(self, n, dtype):
+ A.from_pandas_dataframe(self.data)
+
+ def peakmem_from_series(self, n, dtype):
+ A.from_pandas_dataframe(self.data)
+
+
+class PandasConversionsFromArrow(PandasConversionsBase):
+ param_names = ('size', 'dtype')
+ params = ((1, 10 ** 5, 10 ** 6, 10 ** 7), ('int64', 'float64', 'float64_nans', 'str'))
+
+ def setup(self, n, dtype):
+ super(PandasConversionsFromArrow, self).setup(n, dtype)
+ self.arrow_data = A.from_pandas_dataframe(self.data)
+
+ def time_to_series(self, n, dtype):
+ self.arrow_data.to_pandas()
+
+ def peakmem_to_series(self, n, dtype):
+ self.arrow_data.to_pandas()
+
class ScalarAccess(object):
+ param_names = ('size',)
params = (1, 10 ** 5, 10 ** 6, 10 ** 7)
def setUp(self, n):
- self._array = pyarrow.from_pylist(list(range(n)))
+ self._array = A.from_pylist(list(range(n)))
def time_as_py(self, n):
for i in range(n):