You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by GitBox <gi...@apache.org> on 2019/06/26 17:20:44 UTC

[GitHub] [incubator-mxnet] lebeg opened a new issue #15374: NightlyTestsForBinaries tutorials test broken

lebeg opened a new issue #15374: NightlyTestsForBinaries tutorials test broken
URL: https://github.com/apache/incubator-mxnet/issues/15374
 
 
   The nightly test for tutorials is broken, in particular test_tutorials.test_amp
   
   http://jenkins.mxnet-ci.amazon-ml.com/blue/organizations/jenkins/NightlyTestsForBinaries/detail/master/355/pipeline
   
   ```
   ERROR:root:An error occurred while executing the following cell:
   ------------------
   mbox_loss = gcv.loss.SSDMultiBoxLoss()
   
   for epoch in range(1):
       ce_metric.reset()
       smoothl1_metric.reset()
       tic = time.time()
       btic = time.time()
   
       for i, batch in enumerate(train_data):
           batch_size = batch[0].shape[0]
           data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0)
           cls_targets = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0)
           box_targets = gluon.utils.split_and_load(batch[2], ctx_list=ctx, batch_axis=0)
           with autograd.record():
               cls_preds = []
               box_preds = []
               for x in data:
                   cls_pred, box_pred, _ = net(x)
                   cls_preds.append(cls_pred)
                   box_preds.append(box_pred)
               sum_loss, cls_loss, box_loss = mbox_loss(
                   cls_preds, box_preds, cls_targets, box_targets)
               autograd.backward(sum_loss)
           trainer.step(1)
           ce_metric.update(0, [l * batch_size for l in cls_loss])
           smoothl1_metric.update(0, [l * batch_size for l in box_loss])
           if not (i + 1) % 50:
               name1, loss1 = ce_metric.get()
               name2, loss2 = smoothl1_metric.get()
               logger.info('[Epoch {}][Batch {}], Speed: {:.3f} samples/sec, {}={:.3f}, {}={:.3f}'.format(
                   epoch, i, batch_size/(time.time()-btic), name1, loss1, name2, loss2))
           btic = time.time()
   ------------------
   
   ---------------------------------------------------------------------------
   MXNetError                                Traceback (most recent call last)
   <ipython-input-6-67de7c0b11cc> in <module>
        20                 box_preds.append(box_pred)
        21             sum_loss, cls_loss, box_loss = mbox_loss(
   ---> 22                 cls_preds, box_preds, cls_targets, box_targets)
        23             autograd.backward(sum_loss)
        24         trainer.step(1)
   
   /work/mxnet/python/mxnet/gluon/block.py in __call__(self, *args)
       546             hook(self, args)
       547 
   --> 548         out = self.forward(*args)
       549 
       550         for hook in self._forward_hooks.values():
   
   /usr/local/lib/python3.5/dist-packages/gluoncv/loss.py in forward(self, cls_pred, box_pred, cls_target, box_target)
       157             rank = (cls_loss * (pos - 1)).argsort(axis=1).argsort(axis=1)
       158             hard_negative = rank < nd.maximum(self._min_hard_negatives, pos.sum(axis=1)
   --> 159                                               * self._negative_mining_ratio).expand_dims(-1)
       160             # mask out if not positive or negative
       161             cls_loss = nd.where((pos + hard_negative) > 0, cls_loss, nd.zeros_like(cls_loss))
   
   /work/mxnet/python/mxnet/ndarray/ndarray.py in __lt__(self, other)
       342     def __lt__(self, other):
       343         """x.__lt__(y) <=> x<y <=> mx.nd.lesser(x, y) """
   --> 344         return lesser(self, other)
       345 
       346     def __le__(self, other):
   
   /work/mxnet/python/mxnet/ndarray/ndarray.py in lesser(lhs, rhs)
      3506         lambda x, y: 1 if x < y else 0,
      3507         _internal._lesser_scalar,
   -> 3508         _internal._greater_scalar)
      3509     # pylint: enable= no-member, protected-access
      3510 
   
   /work/mxnet/python/mxnet/ndarray/ndarray.py in _ufunc_helper(lhs, rhs, fn_array, fn_scalar, lfn_scalar, rfn_scalar)
      2706         return lfn_scalar(lhs, float(rhs))
      2707     elif isinstance(rhs, NDArray):
   -> 2708         return fn_array(lhs, rhs)
      2709     else:
      2710         raise TypeError('type %s not supported' % str(type(rhs)))
   
   /work/mxnet/python/mxnet/ndarray/register.py in broadcast_lesser(lhs, rhs, out, name, **kwargs)
   
   /work/mxnet/python/mxnet/_ctypes/ndarray.py in _imperative_invoke(handle, ndargs, keys, vals, out)
        90         c_str_array(keys),
        91         c_str_array([str(s) for s in vals]),
   ---> 92         ctypes.byref(out_stypes)))
        93 
        94     if original_output is not None:
   
   /work/mxnet/python/mxnet/base.py in check_call(ret)
       251     """
       252     if ret != 0:
   --> 253         raise MXNetError(py_str(_LIB.MXGetLastError()))
       254 
       255 
   
   MXNetError: [20:54:33] /work/mxnet/3rdparty/mshadow/../../src/operator/tensor/../elemwise_op_common.h:135: Check failed: assign(&dattr, vec.at(i)): Incompatible attr in node  at 1-th input: expected int32, got float32
   Stack trace:
     [bt] (0) /work/mxnet/python/mxnet/../../lib/libmxnet.so(dmlc::LogMessageFatal::~LogMessageFatal()+0x3c) [0x7f465e8070bc]
     [bt] (1) /work/mxnet/python/mxnet/../../lib/libmxnet.so(bool mxnet::op::ElemwiseAttr<int, &mxnet::op::type_is_none, &mxnet::op::type_assign, true, &mxnet::op::type_string[abi:cxx11], -1, -1>(nnvm::NodeAttrs const&, std::vector<int, std::allocator<int> >*, std::vector<int, std::allocator<int> >*, int const&)::{lambda(std::vector<int, std::allocator<int> > const&, unsigned long, char const*)#1}::operator()(std::vector<int, std::allocator<int> > const&, unsigned long, char const*) const+0x346) [0x7f465e953636]
     [bt] (2) /work/mxnet/python/mxnet/../../lib/libmxnet.so(bool mxnet::op::ElemwiseAttr<int, &mxnet::op::type_is_none, &mxnet::op::type_assign, true, &mxnet::op::type_string[abi:cxx11], -1, -1>(nnvm::NodeAttrs const&, std::vector<int, std::allocator<int> >*, std::vector<int, std::allocator<int> >*, int const&)+0x25d) [0x7f465e954a0d]
     [bt] (3) /work/mxnet/python/mxnet/../../lib/libmxnet.so(bool mxnet::op::ElemwiseType<2, 1>(nnvm::NodeAttrs const&, std::vector<int, std::allocator<int> >*, std::vector<int, std::allocator<int> >*)+0x34f) [0x7f465eb9bc5f]
     [bt] (4) /work/mxnet/python/mxnet/../../lib/libmxnet.so(std::_Function_handler<bool (nnvm::NodeAttrs const&, std::vector<int, std::allocator<int> >*, std::vector<int, std::allocator<int> >*), bool (*)(nnvm::NodeAttrs const&, std::vector<int, std::allocator<int> >*, std::vector<int, std::allocator<int> >*)>::_M_invoke(std::_Any_data const&, nnvm::NodeAttrs const&, std::vector<int, std::allocator<int> >*&&, std::vector<int, std::allocator<int> >*&&)+0x1d) [0x7f465e8adfcd]
     [bt] (5) /work/mxnet/python/mxnet/../../lib/libmxnet.so(mxnet::imperative::SetShapeType(mxnet::Context const&, nnvm::NodeAttrs const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, mxnet::DispatchMode*)+0x22d5) [0x7f4661792ba5]
     [bt] (6) /work/mxnet/python/mxnet/../../lib/libmxnet.so(mxnet::Imperative::Invoke(mxnet::Context const&, nnvm::NodeAttrs const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&)+0x110) [0x7f466179e1b0]
     [bt] (7) /work/mxnet/python/mxnet/../../lib/libmxnet.so(MXImperativeInvokeImpl(void*, int, void**, int*, void***, int, char const**, char const**)+0x1c9) [0x7f466228acf9]
     [bt] (8) /work/mxnet/python/mxnet/../../lib/libmxnet.so(MXImperativeInvokeEx+0x8f) [0x7f466228b1ff]
   
   MXNetError: [20:54:33] /work/mxnet/3rdparty/mshadow/../../src/operator/tensor/../elemwise_op_common.h:135: Check failed: assign(&dattr, vec.at(i)): Incompatible attr in node  at 1-th input: expected int32, got float32
   Stack trace:
     [bt] (0) /work/mxnet/python/mxnet/../../lib/libmxnet.so(dmlc::LogMessageFatal::~LogMessageFatal()+0x3c) [0x7f465e8070bc]
     [bt] (1) /work/mxnet/python/mxnet/../../lib/libmxnet.so(bool mxnet::op::ElemwiseAttr<int, &mxnet::op::type_is_none, &mxnet::op::type_assign, true, &mxnet::op::type_string[abi:cxx11], -1, -1>(nnvm::NodeAttrs const&, std::vector<int, std::allocator<int> >*, std::vector<int, std::allocator<int> >*, int const&)::{lambda(std::vector<int, std::allocator<int> > const&, unsigned long, char const*)#1}::operator()(std::vector<int, std::allocator<int> > const&, unsigned long, char const*) const+0x346) [0x7f465e953636]
     [bt] (2) /work/mxnet/python/mxnet/../../lib/libmxnet.so(bool mxnet::op::ElemwiseAttr<int, &mxnet::op::type_is_none, &mxnet::op::type_assign, true, &mxnet::op::type_string[abi:cxx11], -1, -1>(nnvm::NodeAttrs const&, std::vector<int, std::allocator<int> >*, std::vector<int, std::allocator<int> >*, int const&)+0x25d) [0x7f465e954a0d]
     [bt] (3) /work/mxnet/python/mxnet/../../lib/libmxnet.so(bool mxnet::op::ElemwiseType<2, 1>(nnvm::NodeAttrs const&, std::vector<int, std::allocator<int> >*, std::vector<int, std::allocator<int> >*)+0x34f) [0x7f465eb9bc5f]
     [bt] (4) /work/mxnet/python/mxnet/../../lib/libmxnet.so(std::_Function_handler<bool (nnvm::NodeAttrs const&, std::vector<int, std::allocator<int> >*, std::vector<int, std::allocator<int> >*), bool (*)(nnvm::NodeAttrs const&, std::vector<int, std::allocator<int> >*, std::vector<int, std::allocator<int> >*)>::_M_invoke(std::_Any_data const&, nnvm::NodeAttrs const&, std::vector<int, std::allocator<int> >*&&, std::vector<int, std::allocator<int> >*&&)+0x1d) [0x7f465e8adfcd]
     [bt] (5) /work/mxnet/python/mxnet/../../lib/libmxnet.so(mxnet::imperative::SetShapeType(mxnet::Context const&, nnvm::NodeAttrs const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, mxnet::DispatchMode*)+0x22d5) [0x7f4661792ba5]
     [bt] (6) /work/mxnet/python/mxnet/../../lib/libmxnet.so(mxnet::Imperative::Invoke(mxnet::Context const&, nnvm::NodeAttrs const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&)+0x110) [0x7f466179e1b0]
     [bt] (7) /work/mxnet/python/mxnet/../../lib/libmxnet.so(MXImperativeInvokeImpl(void*, int, void**, int*, void***, int, char const**, char const**)+0x1c9) [0x7f466228acf9]
     [bt] (8) /work/mxnet/python/mxnet/../../lib/libmxnet.so(MXImperativeInvokeEx+0x8f) [0x7f466228b1ff]
   
   ======================================================================
   FAIL: test_tutorials.test_amp
   ----------------------------------------------------------------------
   Traceback (most recent call last):
     File "/usr/local/lib/python3.5/dist-packages/nose/case.py", line 198, in runTest
       self.test(*self.arg)
     File "/work/mxnet/tests/tutorials/test_tutorials.py", line 209, in test_amp
       assert _test_tutorial_nb('amp/amp_tutorial')
   AssertionError
   ```

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services