You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by zh...@apache.org on 2022/08/01 17:22:02 UTC

[incubator-mxnet] branch master updated: [WIP] [BUGFIX] Fix flakey TemporaryDirectory() cleanup on Windows (#21107)

This is an automated email from the ASF dual-hosted git repository.

zhasheng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
     new dedb8c97af [WIP] [BUGFIX] Fix flakey TemporaryDirectory() cleanup on Windows (#21107)
dedb8c97af is described below

commit dedb8c97af040c46bda3890e55472c70f16d237e
Author: Dick Carter <dc...@nvidia.com>
AuthorDate: Mon Aug 1 10:21:45 2022 -0700

    [WIP] [BUGFIX] Fix flakey TemporaryDirectory() cleanup on Windows (#21107)
    
    * Trigger CI to show windows-gpu job failures
    
    * Fix TemporaryDirectory cleanup issues on Windows
    
    * Fix PermissionError handling
    
    * Remove temporary comments
    
    * Fix TemporaryDirectory() to yield dir.name
    
    * Remove temp dirs even with exceptions
---
 python/mxnet/gluon/model_zoo/model_store.py    | 13 +++++--------
 python/mxnet/gluon/utils.py                    |  2 +-
 python/mxnet/util.py                           | 22 ++++++++++++++++++++++
 tests/nightly/test_large_array.py              |  3 ++-
 tests/nightly/test_large_vector.py             |  4 ++--
 tests/python/unittest/common.py                |  2 +-
 tests/python/unittest/test_deferred_compute.py |  4 ++--
 7 files changed, 35 insertions(+), 15 deletions(-)

diff --git a/python/mxnet/gluon/model_zoo/model_store.py b/python/mxnet/gluon/model_zoo/model_store.py
index 0ab11516fa..f742c30fe6 100644
--- a/python/mxnet/gluon/model_zoo/model_store.py
+++ b/python/mxnet/gluon/model_zoo/model_store.py
@@ -21,11 +21,9 @@ __all__ = ['get_model_file', 'purge']
 import os
 import zipfile
 import logging
-import tempfile
 import uuid
-import shutil
 
-from ..utils import download, check_sha1, replace_file
+from ..utils import download, check_sha1, replace_file, TemporaryDirectory
 from ... import base
 
 _model_sha1 = {name: checksum for checksum, name in [
@@ -114,11 +112,10 @@ def get_model_file(name, root=os.path.join(base.data_dir(), 'models')):
     download(_url_format.format(repo_url=repo_url, file_name=file_name),
              path=temp_zip_file_path, overwrite=True)
     with zipfile.ZipFile(temp_zip_file_path) as zf:
-        temp_dir = tempfile.mkdtemp(dir=root)
-        zf.extractall(temp_dir)
-        temp_file_path = os.path.join(temp_dir, file_name+'.params')
-        replace_file(temp_file_path, file_path)
-        shutil.rmtree(temp_dir)
+        with TemporaryDirectory(dir=root) as temp_dir:
+            zf.extractall(temp_dir)
+            temp_file_path = os.path.join(temp_dir, file_name+'.params')
+            replace_file(temp_file_path, file_path)
     os.remove(temp_zip_file_path)
 
     if check_sha1(file_path, sha1_hash):
diff --git a/python/mxnet/gluon/utils.py b/python/mxnet/gluon/utils.py
index 267ed2c793..36ce4b0bfd 100644
--- a/python/mxnet/gluon/utils.py
+++ b/python/mxnet/gluon/utils.py
@@ -34,7 +34,7 @@ import requests
 import numpy as np
 
 from .. import ndarray
-from ..util import is_np_shape, is_np_array
+from ..util import is_np_shape, is_np_array, TemporaryDirectory
 from .. import numpy as _mx_np  # pylint: disable=reimported
 
 
diff --git a/python/mxnet/util.py b/python/mxnet/util.py
index f99dfd0741..3a190d7aa2 100644
--- a/python/mxnet/util.py
+++ b/python/mxnet/util.py
@@ -20,6 +20,9 @@ import ctypes
 import functools
 import inspect
 import threading
+import tempfile
+import platform
+from contextlib import contextmanager
 
 from struct import calcsize
 from .base import (_LIB, check_call, c_str, py_str,
@@ -1359,3 +1362,22 @@ def dtype_from_number(number):
     elif isinstance(number, _np.generic):
         return number.dtype
     raise TypeError('type {} not supported'.format(str(type(number))))
+
+# This is a wrapping of tempfile.TemporaryDirectory(), known to have cleanup issues on Windows.
+# The problem is partially handled as of Python 3.10 by the adding of a 'ignore_cleanup_errors'
+# parameter.  Once MXNet's Python version is forced to be >= 3.10, a simplification of this
+# function to use 'ignore_cleanup_errors' would be possible.  Until the fundamental Windows
+# issues are resolved, best to use this routine instead of tempfile.TemporaryDirectory().
+@contextmanager
+def TemporaryDirectory(*args, **kwargs):
+    """A context wrapper of tempfile.TemporaryDirectory() that ignores cleanup errors on Windows.
+    """
+    dir = tempfile.TemporaryDirectory(*args, **kwargs)
+    try:
+        yield dir.name
+    finally:
+        try:
+            dir.cleanup()
+        except PermissionError:
+            if platform.system() != 'Windows':
+                raise
diff --git a/tests/nightly/test_large_array.py b/tests/nightly/test_large_array.py
index fb1fad544e..391ead3031 100644
--- a/tests/nightly/test_large_array.py
+++ b/tests/nightly/test_large_array.py
@@ -23,6 +23,7 @@ import numpy as np
 import mxnet as mx
 
 from mxnet.test_utils import rand_ndarray, assert_almost_equal, rand_coord_2d, default_device, check_symbolic_forward, create_2d_tensor
+from mxnet.util import TemporaryDirectory
 from mxnet import gluon, nd
 from common import with_seed
 import pytest
@@ -1028,7 +1029,7 @@ def test_tensor():
 
     def check_load_save():
         x = create_2d_tensor(SMALL_Y, LARGE_X)
-        with tempfile.TemporaryDirectory() as tmp:
+        with TemporaryDirectory() as tmp:
             tmpfile = os.path.join(tmp, 'large_tensor')
             nd.save(tmpfile, [x])
             y = nd.load(tmpfile)
diff --git a/tests/nightly/test_large_vector.py b/tests/nightly/test_large_vector.py
index 01d75286b9..dea2e6f3a4 100644
--- a/tests/nightly/test_large_vector.py
+++ b/tests/nightly/test_large_vector.py
@@ -17,12 +17,12 @@
 
 import os
 import sys
-import tempfile
 import math
 import numpy as np
 import mxnet as mx
 
 from mxnet.test_utils import rand_ndarray, assert_almost_equal, rand_coord_2d, create_vector
+from mxnet.util import TemporaryDirectory
 from mxnet import gluon, nd
 from common import with_seed
 import pytest
@@ -374,7 +374,7 @@ def test_tensor():
 
     def check_load_save():
         x = create_vector(size=LARGE_X)
-        with tempfile.TemporaryDirectory() as tmp:
+        with TemporaryDirectory() as tmp:
             tmpfile = os.path.join(tmp, 'large_vector')
             nd.save(tmpfile, [x])
             y = nd.load(tmpfile)
diff --git a/tests/python/unittest/common.py b/tests/python/unittest/common.py
index d55bc74f2c..963abe3bf8 100644
--- a/tests/python/unittest/common.py
+++ b/tests/python/unittest/common.py
@@ -30,7 +30,7 @@ sys.path.insert(0, os.path.join(curr_path, '../../../python'))
 import models
 from contextlib import contextmanager
 import pytest
-from tempfile import TemporaryDirectory
+from mxnet.util import TemporaryDirectory
 import locale
 
 xfail_when_nonstandard_decimal_separator = pytest.mark.xfail(
diff --git a/tests/python/unittest/test_deferred_compute.py b/tests/python/unittest/test_deferred_compute.py
index b9d93a5e6a..3d691515a1 100644
--- a/tests/python/unittest/test_deferred_compute.py
+++ b/tests/python/unittest/test_deferred_compute.py
@@ -17,13 +17,13 @@
 
 import functools
 import operator
-import tempfile
 
 import numpy as np
 
 import mxnet as mx
 import mxnet._deferred_compute as dc
 from mxnet.base import MXNetError
+from mxnet.util import TemporaryDirectory
 import pytest
 
 
@@ -420,7 +420,7 @@ def _assert_dc_gluon(setup, net, setup_is_deterministic=True, numpy=True, autogr
 
     _all_same(ys_np, ys_hybrid_np)
 
-    with tempfile.TemporaryDirectory() as root:
+    with TemporaryDirectory() as root:
         with mx.util.np_shape(True), mx.util.np_array(True):
             net.export(root)