You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by GitBox <gi...@apache.org> on 2020/08/24 14:54:37 UTC

[GitHub] [incubator-mxnet] kpuatamazon opened a new issue #18998: segfault with numpy related to repr

kpuatamazon opened a new issue #18998:
URL: https://github.com/apache/incubator-mxnet/issues/18998


   ## Description
   This code segfaults for me based on 0de7484884292eb028342b1e5669233792429af0.
   ```
   import mxnet as mx
   from mxnet import np, npx
   from mxnet.test_utils import same, use_np
   from common import with_seed
   import pytest
   
   @use_np
   @with_seed()
   @pytest.mark.parametrize('shape', [(i,) for i in range(1, 4)])
   def test_segfault(shape):
       m = mx.nd.random_uniform(low=-3.0, high=3.0, shape=shape)
       ref = mx.nd.cast(m, dtype='int8')
       test = mx.nd.cast(m, dtype='int8').as_np_ndarray()
       assert test == ref.as_np_ndarray()
   ```
   
   ### Error Message
   ```
   python3 -m pytest fail.py
   =============================================================================================== test session starts ===============================================================================================
   platform linux -- Python 3.6.9, pytest-5.4.2, py-1.8.1, pluggy-0.13.1
   rootdir: /home/ubuntu/mxnet-operator, inifile: pytest.ini
   collected 3 items                                                                                                                                                                                                 
   
   fail.py .FFFatal Python error: Segmentation fault
   
   Current thread 0x00007fb99de58740 (most recent call first):
     File "/home/ubuntu/mxnet-operator/python/mxnet/ndarray/ndarray.py", line 2595 in asnumpy
     File "/home/ubuntu/mxnet-operator/python/mxnet/numpy/multiarray.py", line 1310 in __repr__
     File "/home/ubuntu/mxnet-operator-venv/lib/python3.6/site-packages/_pytest/_io/saferepr.py", line 56 in repr_instance
     File "/usr/lib/python3.6/reprlib.py", line 65 in repr1
     File "/usr/lib/python3.6/reprlib.py", line 55 in repr
     File "/home/ubuntu/mxnet-operator-venv/lib/python3.6/site-packages/_pytest/_io/saferepr.py", line 47 in repr
     File "/home/ubuntu/mxnet-operator-venv/lib/python3.6/site-packages/_pytest/_io/saferepr.py", line 82 in saferepr
     File "/home/ubuntu/mxnet-operator-venv/lib/python3.6/site-packages/_pytest/_code/code.py", line 694 in repr_args
     File "/home/ubuntu/mxnet-operator-venv/lib/python3.6/site-packages/_pytest/_code/code.py", line 785 in repr_traceback_entry
     File "/home/ubuntu/mxnet-operator-venv/lib/python3.6/site-packages/_pytest/_code/code.py", line 824 in repr_traceback
     File "/home/ubuntu/mxnet-operator-venv/lib/python3.6/site-packages/_pytest/_code/code.py", line 880 in repr_excinfo
     File "/home/ubuntu/mxnet-operator-venv/lib/python3.6/site-packages/_pytest/_code/code.py", line 635 in getrepr
     File "/home/ubuntu/mxnet-operator-venv/lib/python3.6/site-packages/_pytest/nodes.py", line 361 in _repr_failure_py
     File "/home/ubuntu/mxnet-operator-venv/lib/python3.6/site-packages/_pytest/reports.py", line 297 in from_item_and_call
     File "/home/ubuntu/mxnet-operator-venv/lib/python3.6/site-packages/_pytest/runner.py", line 260 in pytest_runtest_makereport
     File "/home/ubuntu/mxnet-operator-venv/lib/python3.6/site-packages/pluggy/callers.py", line 187 in _multicall
     File "/home/ubuntu/mxnet-operator-venv/lib/python3.6/site-packages/pluggy/manager.py", line 87 in <lambda>
     File "/home/ubuntu/mxnet-operator-venv/lib/python3.6/site-packages/pluggy/manager.py", line 93 in _hookexec
     File "/home/ubuntu/mxnet-operator-venv/lib/python3.6/site-packages/pluggy/hooks.py", line 286 in __call__
     File "/home/ubuntu/mxnet-operator-venv/lib/python3.6/site-packages/_pytest/runner.py", line 188 in call_and_report
     File "/home/ubuntu/mxnet-operator-venv/lib/python3.6/site-packages/_pytest/runner.py", line 101 in runtestprotocol
     File "/home/ubuntu/mxnet-operator-venv/lib/python3.6/site-packages/_pytest/runner.py", line 85 in pytest_runtest_protocol
     File "/home/ubuntu/mxnet-operator-venv/lib/python3.6/site-packages/pluggy/callers.py", line 187 in _multicall
     File "/home/ubuntu/mxnet-operator-venv/lib/python3.6/site-packages/pluggy/manager.py", line 87 in <lambda>
     File "/home/ubuntu/mxnet-operator-venv/lib/python3.6/site-packages/pluggy/manager.py", line 93 in _hookexec
     File "/home/ubuntu/mxnet-operator-venv/lib/python3.6/site-packages/pluggy/hooks.py", line 286 in __call__
     File "/home/ubuntu/mxnet-operator-venv/lib/python3.6/site-packages/_pytest/main.py", line 272 in pytest_runtestloop
     File "/home/ubuntu/mxnet-operator-venv/lib/python3.6/site-packages/pluggy/callers.py", line 187 in _multicall
     File "/home/ubuntu/mxnet-operator-venv/lib/python3.6/site-packages/pluggy/manager.py", line 87 in <lambda>
     File "/home/ubuntu/mxnet-operator-venv/lib/python3.6/site-packages/pluggy/manager.py", line 93 in _hookexec
     File "/home/ubuntu/mxnet-operator-venv/lib/python3.6/site-packages/pluggy/hooks.py", line 286 in __call__
     File "/home/ubuntu/mxnet-operator-venv/lib/python3.6/site-packages/_pytest/main.py", line 247 in _main
     File "/home/ubuntu/mxnet-operator-venv/lib/python3.6/site-packages/_pytest/main.py", line 191 in wrap_session
     File "/home/ubuntu/mxnet-operator-venv/lib/python3.6/site-packages/_pytest/main.py", line 240 in pytest_cmdline_main
     File "/home/ubuntu/mxnet-operator-venv/lib/python3.6/site-packages/pluggy/callers.py", line 187 in _multicall
     File "/home/ubuntu/mxnet-operator-venv/lib/python3.6/site-packages/pluggy/manager.py", line 87 in <lambda>
     File "/home/ubuntu/mxnet-operator-venv/lib/python3.6/site-packages/pluggy/manager.py", line 93 in _hookexec
     File "/home/ubuntu/mxnet-operator-venv/lib/python3.6/site-packages/pluggy/hooks.py", line 286 in __call__
     File "/home/ubuntu/mxnet-operator-venv/lib/python3.6/site-packages/_pytest/config/__init__.py", line 125 in main
     File "/home/ubuntu/mxnet-operator-venv/lib/python3.6/site-packages/pytest/__main__.py", line 7 in <module>
     File "/usr/lib/python3.6/runpy.py", line 85 in _run_code
     File "/usr/lib/python3.6/runpy.py", line 193 in _run_module_as_main
   
   Fatal Error: Segmentation fault
   Stack trace:
     /lib/x86_64-linux-gnu/libc.so.6 (                                           + 0x3efd0)  [0x7fb99d8a7fd0]
     /lib/x86_64-linux-gnu/libpthread.so.0 ( pthread_mutex_lock                        + 0     )  [0x7fb99d653fa0]
     /home/ubuntu/mxnet-operator/python/mxnet/../../build/libmxnet.so ( mxnet::engine::ThreadedEngine::WaitForVar(mxnet::engine::Var*)  + 0x4a  )  [0x7fb97f837d9a]
     /home/ubuntu/mxnet-operator/python/mxnet/../../build/libmxnet.so ( mxnet::NDArray::SyncCopyToCPU(void*, unsigned long) const  + 0x314 )  [0x7fb97f9f61c4]
     /home/ubuntu/mxnet-operator/python/mxnet/../../build/libmxnet.so ( MXNDArraySyncCopyToCPU                    + 0x2b  )  [0x7fb97f7358db]
     /usr/lib/x86_64-linux-gnu/libffi.so.6 ( ffi_call_unix64                           + 0x4c  )  [0x7fb99b5cedae]
     /usr/lib/x86_64-linux-gnu/libffi.so.6 ( ffi_call                                  + 0x22f )  [0x7fb99b5ce71f]
   Segmentation fault (core dumped)
   ```
   
   ## To Reproduce
   1. Get Ubuntu 18.04 LTS
   2.  Compile with `cmake -DCMAKE_BUILD_TYPE=RelWithDebInfo -DENABLE_TESTCOVERAGE=OFF -DUSE_MKLDNN=OFF -DUSE_CUDA=OFF -DUSE_TVM_OP=ON -DUSE_MKL_IF_AVAILABLE=ON -DUSE_BLAS=MKL -GNinja ..` install, etc.
   3. Create `segfault.py` using above code
   4. `python3 -m pytest segfault.py`
   
   ## What have you tried to solve it?
   
   1. Reducing to an example that wasn't my code
   2. Note if I change `range(1,4)` to `range(1,3)` we get a proper error:
   ```
   =============================================================================================== test session starts ===============================================================================================
   platform linux -- Python 3.6.9, pytest-5.4.2, py-1.8.1, pluggy-0.13.1
   rootdir: /home/ubuntu/mxnet-operator, inifile: pytest.ini
   collected 2 items                                                                                                                                                                                                 
   
   fail.py .F                                                                                                                                                                                                  [100%]
   
   ==================================================================================================== FAILURES =====================================================================================================
   ______________________________________________________________________________________________ test_segfault[shape1] ______________________________________________________________________________________________
   
   shape = (2,)
   
       @use_np
       @with_seed()
       @pytest.mark.parametrize('shape', [(i,) for i in range(1, 3)])
       def test_segfault(shape):
           m = mx.nd.random_uniform(low=-3.0, high=3.0, shape=shape)
           ref = mx.nd.cast(m, dtype='int8')
           test = mx.nd.cast(m, dtype='int8').as_np_ndarray()
   >       assert test == ref.as_np_ndarray()
   
   fail.py:31: 
   _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
   
   self = array([ True,  True])
   
       def __bool__(self):
           num_elements = self.size
           if num_elements == 0:
               warnings.simplefilter('default')
               warnings.warn('The truth value of an empty array is ambiguous. Returning False, but in'
                             ' future this will result in an error.', DeprecationWarning)
               return False
           elif num_elements == 1:
               return bool(self.item())
           else:
   >           raise ValueError("The truth value of an ndarray with multiple elements is ambiguous.")
   E           ValueError: The truth value of an ndarray with multiple elements is ambiguous.
   
   ../../../python/mxnet/numpy/multiarray.py:1174: ValueError
   ---------------------------------------------------------------------------------------------- Captured stderr call -----------------------------------------------------------------------------------------------
   [WARNING] Error seen with seeded test, use MXNET_TEST_SEED=1669351960 to reproduce.
   WARNING:common:Error seen with seeded test, use MXNET_TEST_SEED=1669351960 to reproduce.
   ------------------------------------------------------------------------------------------------ Captured log call ------------------------------------------------------------------------------------------------
   WARNING  common:common.py:230 Error seen with seeded test, use MXNET_TEST_SEED=1669351960 to reproduce.
   ============================================================================================= short test summary info =============================================================================================
   FAILED fail.py::test_segfault[shape1] - ValueError: The truth value of an ndarray with multiple elements is ambiguous.
   =========================================================================================== 1 failed, 1 passed in 0.17s ===========================================================================================
   ```
   
   ## Environment
   
   The script `https://raw.githubusercontent.com/dmlc/gluon-nlp/master/tools/diagnose.py` is returning 404.  


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [incubator-mxnet] leezu commented on issue #18998: segfault with numpy related to repr

Posted by GitBox <gi...@apache.org>.
leezu commented on issue #18998:
URL: https://github.com/apache/incubator-mxnet/issues/18998#issuecomment-679259633


   Both examples yields `ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()`, which is consistent with numpy. Try cleaning your build directory after updating to https://github.com/apache/incubator-mxnet/commit/0de7484884292eb028342b1e5669233792429af0
   
   Let's reopen if the issue persists. Thanks


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [incubator-mxnet] leezu edited a comment on issue #18998: segfault with numpy related to repr

Posted by GitBox <gi...@apache.org>.
leezu edited a comment on issue #18998:
URL: https://github.com/apache/incubator-mxnet/issues/18998#issuecomment-679259633


   Both examples yields `ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()`, which is consistent with numpy. Try cleaning your build directory after updating to https://github.com/apache/incubator-mxnet/commit/0de7484884292eb028342b1e5669233792429af0
   
   I'll also try with the same compile options that you specified above. I can't repro with the compile settings suggested by you either.


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [incubator-mxnet] kpuatamazon closed issue #18998: segfault with numpy related to repr

Posted by GitBox <gi...@apache.org>.
kpuatamazon closed issue #18998:
URL: https://github.com/apache/incubator-mxnet/issues/18998


   


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@mxnet.apache.org
For additional commands, e-mail: issues-help@mxnet.apache.org


[GitHub] [incubator-mxnet] leezu commented on issue #18998: segfault with numpy related to repr

Posted by GitBox <gi...@apache.org>.
leezu commented on issue #18998:
URL: https://github.com/apache/incubator-mxnet/issues/18998#issuecomment-679355864


   I still can't reproduce the segfault, though I'm on DLAMI and you are on standard Ubuntu 18.04 LTS. I have previously seen the error you posted (`pthread_mutex_lock`) in case of ABI issues. Would there be any modifications on your system? Did you check if there is any outdated libmxnet.so in the lib folder?


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [incubator-mxnet] leezu edited a comment on issue #18998: segfault with numpy related to repr

Posted by GitBox <gi...@apache.org>.
leezu edited a comment on issue #18998:
URL: https://github.com/apache/incubator-mxnet/issues/18998#issuecomment-679259633


   Both examples yields `ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()`, which is consistent with numpy. Try cleaning your build directory after updating to https://github.com/apache/incubator-mxnet/commit/0de7484884292eb028342b1e5669233792429af0
   
   I'll also try with the same compile options that you specified above.


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [incubator-mxnet] leezu closed issue #18998: segfault with numpy related to repr

Posted by GitBox <gi...@apache.org>.
leezu closed issue #18998:
URL: https://github.com/apache/incubator-mxnet/issues/18998


   


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [incubator-mxnet] kpuatamazon commented on issue #18998: segfault with numpy related to repr

Posted by GitBox <gi...@apache.org>.
kpuatamazon commented on issue #18998:
URL: https://github.com/apache/incubator-mxnet/issues/18998#issuecomment-679197488


   Shorter example.
   ```
   import mxnet as mx
   from mxnet.test_utils import use_np
   import pytest
   
   @use_np
   @pytest.mark.parametrize('shape', [(i,) for i in range(2,4)])
   def test_segfault(shape):
       m = mx.np.zeros(shape)
       assert m == m
   ```


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [incubator-mxnet] kpuatamazon commented on issue #18998: segfault with numpy related to repr

Posted by GitBox <gi...@apache.org>.
kpuatamazon commented on issue #18998:
URL: https://github.com/apache/incubator-mxnet/issues/18998#issuecomment-679327365


   Fresh build with 3c4ac19 still segfaults.  


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [incubator-mxnet] kpuatamazon commented on issue #18998: segfault with numpy related to repr

Posted by GitBox <gi...@apache.org>.
kpuatamazon commented on issue #18998:
URL: https://github.com/apache/incubator-mxnet/issues/18998#issuecomment-683754300


   I've tried and failed to reproduce this on a fresh Ubuntu machine.  Still fails consistently on the original machine, but it's possible something interesting happened to the installation there.  Will come back if I figure something out. 
   
   Backtrace, omitting a bunch of python at the bottom.  
   ```
   #0  __GI___pthread_mutex_lock (mutex=0x20) at ../nptl/pthread_mutex_lock.c:65
   #1  0x00007fffd808407a in mxnet::engine::ThreadedEngine::WaitForVar(mxnet::engine::Var*) () from /home/ubuntu/mxnet-operator/python/mxnet/../../build/libmxnet.so
   #2  0x00007fffd826b751 in mxnet::NDArray::SyncCopyToCPU(void*, unsigned long) const () from /home/ubuntu/mxnet-operator/python/mxnet/../../build/libmxnet.so
   #3  0x00007fffd7f5744b in MXNDArraySyncCopyToCPU () from /home/ubuntu/mxnet-operator/python/mxnet/../../build/libmxnet.so
   #4  0x00007ffff5749dae in ffi_call_unix64 () from /usr/lib/x86_64-linux-gnu/libffi.so.6
   #5  0x00007ffff574971f in ffi_call () from /usr/lib/x86_64-linux-gnu/libffi.so.6
   #6  0x00007ffff595d5c4 in _ctypes_callproc () from /usr/lib/python3.6/lib-dynload/_ctypes.cpython-36m-x86_64-linux-gnu.so
   #7  0x00007ffff595dc33 in ?? () from /usr/lib/python3.6/lib-dynload/_ctypes.cpython-36m-x86_64-linux-gnu.so
   #8  0x00000000005a9eec in _PyObject_FastCallKeywords ()
   #9  0x000000000050a783 in ?? ()
   #10 0x000000000050c1f4 in _PyEval_EvalFrameDefault ()
   #11 0x0000000000509918 in ?? ()
   #12 0x000000000050a64d in ?? ()
   #13 0x000000000050c1f4 in _PyEval_EvalFrameDefault ()
   #14 0x0000000000509015 in _PyFunction_FastCallDict ()
   #15 0x0000000000594b01 in ?? ()
   #16 0x00000000005a4a0c in _PyObject_FastCallDict ()
   #17 0x00000000006195a8 in ?? ()
   #18 0x0000000000564e41 in PyObject_Repr ()
   ```


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@mxnet.apache.org
For additional commands, e-mail: issues-help@mxnet.apache.org