You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by zh...@apache.org on 2022/08/01 17:22:02 UTC
[incubator-mxnet] branch master updated: [WIP] [BUGFIX] Fix flakey TemporaryDirectory() cleanup on Windows (#21107)
This is an automated email from the ASF dual-hosted git repository.
zhasheng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git
The following commit(s) were added to refs/heads/master by this push:
new dedb8c97af [WIP] [BUGFIX] Fix flakey TemporaryDirectory() cleanup on Windows (#21107)
dedb8c97af is described below
commit dedb8c97af040c46bda3890e55472c70f16d237e
Author: Dick Carter <dc...@nvidia.com>
AuthorDate: Mon Aug 1 10:21:45 2022 -0700
[WIP] [BUGFIX] Fix flakey TemporaryDirectory() cleanup on Windows (#21107)
* Trigger CI to show windows-gpu job failures
* Fix TemporaryDirectory cleanup issues on Windows
* Fix PermissionError handling
* Remove temporary comments
* Fix TemporaryDirectory() to yield dir.name
* Remove temp dirs even with exceptions
---
python/mxnet/gluon/model_zoo/model_store.py | 13 +++++--------
python/mxnet/gluon/utils.py | 2 +-
python/mxnet/util.py | 22 ++++++++++++++++++++++
tests/nightly/test_large_array.py | 3 ++-
tests/nightly/test_large_vector.py | 4 ++--
tests/python/unittest/common.py | 2 +-
tests/python/unittest/test_deferred_compute.py | 4 ++--
7 files changed, 35 insertions(+), 15 deletions(-)
diff --git a/python/mxnet/gluon/model_zoo/model_store.py b/python/mxnet/gluon/model_zoo/model_store.py
index 0ab11516fa..f742c30fe6 100644
--- a/python/mxnet/gluon/model_zoo/model_store.py
+++ b/python/mxnet/gluon/model_zoo/model_store.py
@@ -21,11 +21,9 @@ __all__ = ['get_model_file', 'purge']
import os
import zipfile
import logging
-import tempfile
import uuid
-import shutil
-from ..utils import download, check_sha1, replace_file
+from ..utils import download, check_sha1, replace_file, TemporaryDirectory
from ... import base
_model_sha1 = {name: checksum for checksum, name in [
@@ -114,11 +112,10 @@ def get_model_file(name, root=os.path.join(base.data_dir(), 'models')):
download(_url_format.format(repo_url=repo_url, file_name=file_name),
path=temp_zip_file_path, overwrite=True)
with zipfile.ZipFile(temp_zip_file_path) as zf:
- temp_dir = tempfile.mkdtemp(dir=root)
- zf.extractall(temp_dir)
- temp_file_path = os.path.join(temp_dir, file_name+'.params')
- replace_file(temp_file_path, file_path)
- shutil.rmtree(temp_dir)
+ with TemporaryDirectory(dir=root) as temp_dir:
+ zf.extractall(temp_dir)
+ temp_file_path = os.path.join(temp_dir, file_name+'.params')
+ replace_file(temp_file_path, file_path)
os.remove(temp_zip_file_path)
if check_sha1(file_path, sha1_hash):
diff --git a/python/mxnet/gluon/utils.py b/python/mxnet/gluon/utils.py
index 267ed2c793..36ce4b0bfd 100644
--- a/python/mxnet/gluon/utils.py
+++ b/python/mxnet/gluon/utils.py
@@ -34,7 +34,7 @@ import requests
import numpy as np
from .. import ndarray
-from ..util import is_np_shape, is_np_array
+from ..util import is_np_shape, is_np_array, TemporaryDirectory
from .. import numpy as _mx_np # pylint: disable=reimported
diff --git a/python/mxnet/util.py b/python/mxnet/util.py
index f99dfd0741..3a190d7aa2 100644
--- a/python/mxnet/util.py
+++ b/python/mxnet/util.py
@@ -20,6 +20,9 @@ import ctypes
import functools
import inspect
import threading
+import tempfile
+import platform
+from contextlib import contextmanager
from struct import calcsize
from .base import (_LIB, check_call, c_str, py_str,
@@ -1359,3 +1362,22 @@ def dtype_from_number(number):
elif isinstance(number, _np.generic):
return number.dtype
raise TypeError('type {} not supported'.format(str(type(number))))
+
+# This is a wrapping of tempfile.TemporaryDirectory(), known to have cleanup issues on Windows.
+# The problem is partially handled as of Python 3.10 by the adding of a 'ignore_cleanup_errors'
+# parameter. Once MXNet's Python version is forced to be >= 3.10, a simplification of this
+# function to use 'ignore_cleanup_errors' would be possible. Until the fundamental Windows
+# issues are resolved, best to use this routine instead of tempfile.TemporaryDirectory().
+@contextmanager
+def TemporaryDirectory(*args, **kwargs):
+ """A context wrapper of tempfile.TemporaryDirectory() that ignores cleanup errors on Windows.
+ """
+ dir = tempfile.TemporaryDirectory(*args, **kwargs)
+ try:
+ yield dir.name
+ finally:
+ try:
+ dir.cleanup()
+ except PermissionError:
+ if platform.system() != 'Windows':
+ raise
diff --git a/tests/nightly/test_large_array.py b/tests/nightly/test_large_array.py
index fb1fad544e..391ead3031 100644
--- a/tests/nightly/test_large_array.py
+++ b/tests/nightly/test_large_array.py
@@ -23,6 +23,7 @@ import numpy as np
import mxnet as mx
from mxnet.test_utils import rand_ndarray, assert_almost_equal, rand_coord_2d, default_device, check_symbolic_forward, create_2d_tensor
+from mxnet.util import TemporaryDirectory
from mxnet import gluon, nd
from common import with_seed
import pytest
@@ -1028,7 +1029,7 @@ def test_tensor():
def check_load_save():
x = create_2d_tensor(SMALL_Y, LARGE_X)
- with tempfile.TemporaryDirectory() as tmp:
+ with TemporaryDirectory() as tmp:
tmpfile = os.path.join(tmp, 'large_tensor')
nd.save(tmpfile, [x])
y = nd.load(tmpfile)
diff --git a/tests/nightly/test_large_vector.py b/tests/nightly/test_large_vector.py
index 01d75286b9..dea2e6f3a4 100644
--- a/tests/nightly/test_large_vector.py
+++ b/tests/nightly/test_large_vector.py
@@ -17,12 +17,12 @@
import os
import sys
-import tempfile
import math
import numpy as np
import mxnet as mx
from mxnet.test_utils import rand_ndarray, assert_almost_equal, rand_coord_2d, create_vector
+from mxnet.util import TemporaryDirectory
from mxnet import gluon, nd
from common import with_seed
import pytest
@@ -374,7 +374,7 @@ def test_tensor():
def check_load_save():
x = create_vector(size=LARGE_X)
- with tempfile.TemporaryDirectory() as tmp:
+ with TemporaryDirectory() as tmp:
tmpfile = os.path.join(tmp, 'large_vector')
nd.save(tmpfile, [x])
y = nd.load(tmpfile)
diff --git a/tests/python/unittest/common.py b/tests/python/unittest/common.py
index d55bc74f2c..963abe3bf8 100644
--- a/tests/python/unittest/common.py
+++ b/tests/python/unittest/common.py
@@ -30,7 +30,7 @@ sys.path.insert(0, os.path.join(curr_path, '../../../python'))
import models
from contextlib import contextmanager
import pytest
-from tempfile import TemporaryDirectory
+from mxnet.util import TemporaryDirectory
import locale
xfail_when_nonstandard_decimal_separator = pytest.mark.xfail(
diff --git a/tests/python/unittest/test_deferred_compute.py b/tests/python/unittest/test_deferred_compute.py
index b9d93a5e6a..3d691515a1 100644
--- a/tests/python/unittest/test_deferred_compute.py
+++ b/tests/python/unittest/test_deferred_compute.py
@@ -17,13 +17,13 @@
import functools
import operator
-import tempfile
import numpy as np
import mxnet as mx
import mxnet._deferred_compute as dc
from mxnet.base import MXNetError
+from mxnet.util import TemporaryDirectory
import pytest
@@ -420,7 +420,7 @@ def _assert_dc_gluon(setup, net, setup_is_deterministic=True, numpy=True, autogr
_all_same(ys_np, ys_hybrid_np)
- with tempfile.TemporaryDirectory() as root:
+ with TemporaryDirectory() as root:
with mx.util.np_shape(True), mx.util.np_array(True):
net.export(root)