You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2017/05/19 13:06:19 UTC

arrow git commit: ARROW-1053: [Python] Remove unnecessary Py_INCREF in PyBuffer causing memory leak

Repository: arrow
Updated Branches:
  refs/heads/master 37cdc6e99 -> ff72951f0


ARROW-1053: [Python] Remove unnecessary Py_INCREF in PyBuffer causing memory leak

cc @BryanCutler

Author: Wes McKinney <we...@twosigma.com>

Closes #704 from wesm/ARROW-1053 and squashes the following commits:

2f90337c [Wes McKinney] Remove unnecessary Py_INCREF in PyBuffer causing memory leak


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/ff72951f
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/ff72951f
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/ff72951f

Branch: refs/heads/master
Commit: ff72951f0262ea230f7cfbb2f4c97ea7cf70845b
Parents: 37cdc6e
Author: Wes McKinney <we...@twosigma.com>
Authored: Fri May 19 09:06:14 2017 -0400
Committer: Wes McKinney <we...@twosigma.com>
Committed: Fri May 19 09:06:14 2017 -0400

----------------------------------------------------------------------
 cpp/src/arrow/python/common.cc |  1 -
 python/scripts/test_leak.py    | 27 ++++++++++++++++++++++++++-
 2 files changed, 26 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/ff72951f/cpp/src/arrow/python/common.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/common.cc b/cpp/src/arrow/python/common.cc
index ba7b6cf..a248db3 100644
--- a/cpp/src/arrow/python/common.cc
+++ b/cpp/src/arrow/python/common.cc
@@ -55,7 +55,6 @@ PyBuffer::PyBuffer(PyObject* obj) : Buffer(nullptr, 0), obj_(nullptr) {
     size_ = buffer->len;
     capacity_ = buffer->len;
     is_mutable_ = false;
-    Py_INCREF(obj_);
   }
 }
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/ff72951f/python/scripts/test_leak.py
----------------------------------------------------------------------
diff --git a/python/scripts/test_leak.py b/python/scripts/test_leak.py
index 2b197b6..0b12fb5 100644
--- a/python/scripts/test_leak.py
+++ b/python/scripts/test_leak.py
@@ -21,6 +21,7 @@ import pyarrow as pa
 import numpy as np
 import memory_profiler
 import gc
+import io
 
 
 def leak():
@@ -32,4 +33,28 @@ def leak():
         table.to_pandas()
         gc.collect()
 
-leak()
+# leak()
+
+
+def leak2():
+    data = [pa.array(np.concatenate([np.random.randn(100000)] * 10))]
+    table = pa.Table.from_arrays(data, ['foo'])
+    while True:
+        print('calling to_pandas')
+        print('memory_usage: {0}'.format(memory_profiler.memory_usage()))
+        df = table.to_pandas()
+
+        batch = pa.RecordBatch.from_pandas(df)
+
+        sink = io.BytesIO()
+        writer = pa.RecordBatchFileWriter(sink, batch.schema)
+        writer.write_batch(batch)
+        writer.close()
+
+        buf_reader = pa.BufferReader(sink.getvalue())
+        reader = pa.open_file(buf_reader)
+        reader.read_all()
+
+        gc.collect()
+
+leak2()