You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by GitBox <gi...@apache.org> on 2020/07/01 03:13:01 UTC
[GitHub] [incubator-mxnet] DongfeiJi commented on issue #18643: ndarray.contrib.boolean_mask can not be hybridize
DongfeiJi commented on issue #18643:
URL: https://github.com/apache/incubator-mxnet/issues/18643#issuecomment-652161754
> Hi @DongfeiJi ,
> It works for me on MXNet 2.0.
> Note that `boolean_mask` doesn't work when mask are all `zero/false`, since the traditional operator doesn't support zero-size array.
>
> ```python
> import mxnet as mx
> from mxnet import gluon
> from mxnet.gluon.loss import Loss, _apply_weighting
>
> class NewTripletLoss(Loss):
> def __init__(self, batch_size_per_gpu, margin=1, weight=None, batch_axis=0, **kwargs):
> super(NewTripletLoss, self).__init__(weight, batch_axis, **kwargs)
> self.batch_size_per_gpu = batch_size_per_gpu
> self.margin = margin
> def hybrid_forward(self, F, embeddings, labels, sample_weight=None):
> N = self.batch_size_per_gpu
> # get distance
> xx = F.power(embeddings, 2).sum(1, keepdims=True).tile((1, self.batch_size_per_gpu))
> dist = F.broadcast_add(xx, xx.transpose())
> dist = F.broadcast_sub(dist, 2 * F.dot(embeddings, embeddings.transpose()))
> dist = F.clip(dist, 1e-12, 1e12)
> # get mask
> labels = F.cast(labels, dtype='float32')
> labels = labels.expand_dims(1).tile((1, self.batch_size_per_gpu))
> is_pos = F.broadcast_equal(labels, labels.transpose())
> is_neg = F.broadcast_not_equal(labels, labels.transpose())
> # hard example mining
> dist_mat = dist.reshape((self.batch_size_per_gpu * self.batch_size_per_gpu,))
> pos_mask = is_pos.reshape((self.batch_size_per_gpu * self.batch_size_per_gpu,))
> dist_ap = F.contrib.boolean_mask(dist_mat, pos_mask).reshape((self.batch_size_per_gpu, -1))
> #dist_ap = F.broadcast_mul(dist_mat, pos_mask).reshape((self.batch_size_per_gpu, -1))
> dist_ap = F.max(dist_ap, axis=1)
> neg_mask = is_neg.reshape((self.batch_size_per_gpu * self.batch_size_per_gpu,))
> dist_an = F.contrib.boolean_mask(dist_mat, neg_mask).reshape((self.batch_size_per_gpu, -1))
> #dist_an = F.broadcast_mul(dist_mat, neg_mask).reshape((self.batch_size_per_gpu, -1))
> dist_an = F.min(dist_an, axis=1)
> # add margin
> margin = F.full(shape=(self.batch_size_per_gpu, 1), val=self.margin)
> loss = F.broadcast_add(F.broadcast_sub(dist_ap, dist_an), margin)
> loss = F.maximum(loss, F.zeros_like(loss))
> # apply weight
> loss = _apply_weighting(F, loss, self._weight, sample_weight)
> return F.mean(loss, axis=self._batch_axis, exclude=True)
>
> block = NewTripletLoss(2)
> block.hybridize()
> embeddings = mx.nd.array([[1.0, 0.0, 1.0], [1.0, 1.0, 0.0]]).reshape((2,3))
> embeddings.attach_grad()
> labels = mx.nd.array([0, 1]).reshape((2, ))
> with mx.autograd.record():
> out = block(embeddings, labels)
> out.sum().backward()
> print(out)
> mx.nd.waitall()
> ```
Thank you again for your reply. It is OK to hybridize directly. However, if the initialization of the model is delayed, there will be problems. You can run my code. I report an error because I use gluon's trainer, and the mxnet version is 1.5.
PS: This is an example of my code, for simplify, i do not use gluon.trainer.
If use nd, it is ok, when hybridize, it does not work.
`import mxnet
from mxnet import nd
from mxnet.gluon import nn
from mxnet.gluon.loss import Loss, _apply_weighting
class MyBlock(nn.HybridBlock):
def __init__(self, **kwargs):
super(MyBlock, self).__init__(**kwargs)
self.conv = nn.Conv2D(channels=2048,
kernel_size=1,
strides=1,
padding=0,
use_bias=False)
self.pool = nn.GlobalAvgPool2D()
self.flatten = nn.Flatten()
def hybrid_forward(self, F, x):
x = self.conv(x)
x = self.pool(x)
x = self.flatten(x)
return x
class NewTripletLoss(Loss):
def __init__(self, batch_size_per_gpu, margin=1, weight=None, batch_axis=0, **kwargs):
super(NewTripletLoss, self).__init__(weight, batch_axis, **kwargs)
self.batch_size_per_gpu = batch_size_per_gpu
self.margin = margin
def hybrid_forward(self, F, embeddings, labels, sample_weight=None):
N = self.batch_size_per_gpu
# get distance
xx = F.power(embeddings, 2).sum(1, keepdims=True).tile((1, self.batch_size_per_gpu))
dist = F.broadcast_add(xx, xx.transpose())
dist = F.broadcast_sub(dist, 2 * F.dot(embeddings, embeddings.transpose()))
dist = F.clip(dist, 1e-12, 1e12).sqrt()
print(dist)
# get mask
labels = F.cast(labels, dtype='float32')
labels = labels.expand_dims(1).tile((1, self.batch_size_per_gpu))
is_pos = F.broadcast_equal(labels, labels.transpose())
is_neg = F.broadcast_not_equal(labels, labels.transpose())
# hard example mining
dist_mat = dist.reshape((self.batch_size_per_gpu * self.batch_size_per_gpu,))
pos_mask = is_pos.reshape((self.batch_size_per_gpu * self.batch_size_per_gpu,))
dist_ap = F.contrib.boolean_mask(dist_mat, pos_mask).reshape((self.batch_size_per_gpu, -1))
# dist_ap = F.broadcast_mul(dist_mat, pos_mask).reshape((self.batch_size_per_gpu, -1))
dist_ap = F.max(dist_ap, axis=1)
neg_mask = is_neg.reshape((self.batch_size_per_gpu * self.batch_size_per_gpu,))
dist_an = F.contrib.boolean_mask(dist_mat, neg_mask).reshape((self.batch_size_per_gpu, -1))
# dist_an = F.broadcast_mul(dist_mat, neg_mask).reshape((self.batch_size_per_gpu, -1))
dist_an = F.min(dist_an, axis=1)
# add margin
margin = F.full(shape=(self.batch_size_per_gpu, 1), val=self.margin)
loss = F.broadcast_add(F.broadcast_sub(dist_ap, dist_an), margin)
loss = F.maximum(loss, F.zeros_like(loss))
# apply weight
loss = _apply_weighting(F, loss, self._weight, sample_weight)
return F.mean(loss, axis=self._batch_axis, exclude=True)
class Model(nn.HybridBlock):
def __init__(self, **kwargs):
super(Model, self).__init__(**kwargs)
self.net = MyBlock()
self.loss = NewTripletLoss(batch_size_per_gpu=64, margin=0.35)
def hybrid_forward(self, F, x, y):
x = self.net(x)
loss = self.loss(x, y)
return loss
if __name__ == "__main__":
import numpy as np
import random
feat = np.random.rand(64, 3, 32, 100)
feat = nd.array(feat)
# feat = nd.random.randn(64, 2048)
target = []
label = [_ for _ in range(16)]
for i in range(4):
target += label
random.shuffle(target)
target = nd.array(target)
model = Model()
model.initialize()
model.hybridize()
loss = model(feat, target)
print(loss)
`
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
users@infra.apache.org