You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2017/05/19 13:06:19 UTC
arrow git commit: ARROW-1053: [Python] Remove unnecessary Py_INCREF
in PyBuffer causing memory leak
Repository: arrow
Updated Branches:
refs/heads/master 37cdc6e99 -> ff72951f0
ARROW-1053: [Python] Remove unnecessary Py_INCREF in PyBuffer causing memory leak
cc @BryanCutler
Author: Wes McKinney <we...@twosigma.com>
Closes #704 from wesm/ARROW-1053 and squashes the following commits:
2f90337c [Wes McKinney] Remove unnecessary Py_INCREF in PyBuffer causing memory leak
Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/ff72951f
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/ff72951f
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/ff72951f
Branch: refs/heads/master
Commit: ff72951f0262ea230f7cfbb2f4c97ea7cf70845b
Parents: 37cdc6e
Author: Wes McKinney <we...@twosigma.com>
Authored: Fri May 19 09:06:14 2017 -0400
Committer: Wes McKinney <we...@twosigma.com>
Committed: Fri May 19 09:06:14 2017 -0400
----------------------------------------------------------------------
cpp/src/arrow/python/common.cc | 1 -
python/scripts/test_leak.py | 27 ++++++++++++++++++++++++++-
2 files changed, 26 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/arrow/blob/ff72951f/cpp/src/arrow/python/common.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/common.cc b/cpp/src/arrow/python/common.cc
index ba7b6cf..a248db3 100644
--- a/cpp/src/arrow/python/common.cc
+++ b/cpp/src/arrow/python/common.cc
@@ -55,7 +55,6 @@ PyBuffer::PyBuffer(PyObject* obj) : Buffer(nullptr, 0), obj_(nullptr) {
size_ = buffer->len;
capacity_ = buffer->len;
is_mutable_ = false;
- Py_INCREF(obj_);
}
}
http://git-wip-us.apache.org/repos/asf/arrow/blob/ff72951f/python/scripts/test_leak.py
----------------------------------------------------------------------
diff --git a/python/scripts/test_leak.py b/python/scripts/test_leak.py
index 2b197b6..0b12fb5 100644
--- a/python/scripts/test_leak.py
+++ b/python/scripts/test_leak.py
@@ -21,6 +21,7 @@ import pyarrow as pa
import numpy as np
import memory_profiler
import gc
+import io
def leak():
@@ -32,4 +33,28 @@ def leak():
table.to_pandas()
gc.collect()
-leak()
+# leak()
+
+
+def leak2():
+ data = [pa.array(np.concatenate([np.random.randn(100000)] * 10))]
+ table = pa.Table.from_arrays(data, ['foo'])
+ while True:
+ print('calling to_pandas')
+ print('memory_usage: {0}'.format(memory_profiler.memory_usage()))
+ df = table.to_pandas()
+
+ batch = pa.RecordBatch.from_pandas(df)
+
+ sink = io.BytesIO()
+ writer = pa.RecordBatchFileWriter(sink, batch.schema)
+ writer.write_batch(batch)
+ writer.close()
+
+ buf_reader = pa.BufferReader(sink.getvalue())
+ reader = pa.open_file(buf_reader)
+ reader.read_all()
+
+ gc.collect()
+
+leak2()