You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@singa.apache.org by wa...@apache.org on 2018/08/22 14:54:58 UTC
[2/5] incubator-singa git commit: SINGA-388 Develop some RNN layers
by calling tiny operations like matmul, addbias
SINGA-388 Develop some RNN layers by calling tiny operations like matmul, addbias
- Develop LSTM by calling tiny operations.
- Add some operations which are necessary for LSTM layer into autograd.py
- redesign the structure of RNN Layer.
- LSTM layer has passed test.(return correct number of gradients)
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/5dc17b91
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/5dc17b91
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/5dc17b91
Branch: refs/heads/master
Commit: 5dc17b91e045e6318d224bebee8912da6c646596
Parents: 7df6a5d
Author: xuewanqi <xu...@outlook.com>
Authored: Mon Aug 13 14:11:37 2018 +0000
Committer: xuewanqi <xu...@outlook.com>
Committed: Thu Aug 16 11:40:25 2018 +0000
----------------------------------------------------------------------
python/singa/autograd.py | 143 ++++++++++++++++++++++++++++++++++++++----
1 file changed, 131 insertions(+), 12 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5dc17b91/python/singa/autograd.py
----------------------------------------------------------------------
diff --git a/python/singa/autograd.py b/python/singa/autograd.py
index 4c7959c..b18e08e 100644
--- a/python/singa/autograd.py
+++ b/python/singa/autograd.py
@@ -985,6 +985,41 @@ class Tanh(Operation):
def tanh(x):
return Tanh()(x)[0]
+class Sigmoid(Operation):
+
+ def forward(self, x):
+ out = singa.Sigmoid(x)
+ if training:
+ self.cache = (out,)
+ return out
+
+ def backward(self, dy):
+ dx = singa.MultFloat(self.cache[0], -1.0)
+ dx = singa.AddFloat(dx, 1.0)
+ dx = singa.__mul__(self.cache[0], dx)
+ dx = singa.__mul__(dy, dx)
+ return dx
+
+
+def sigmoid(x):
+ return Sigmoid()(x)[0]
+
+
+class ElemMatmul(Operation):
+
+ def forward(self, x1, x2):
+ if training:
+ self.cache = (x1, x2)
+ return singa.__mul__(x1, x2)
+
+ def backward(self, dy):
+ dx1 = singa.__mul__(dy, self.cache[1])
+ dx2 = singa.__mul__(dy, self.cache[0])
+ return dx1, dx2
+
+
+def elemmatmul(x, y):
+ return ElemMatmul()(x, y)[0]
def add_all(*xs):
assert len(xs) > 2
@@ -993,9 +1028,22 @@ def add_all(*xs):
y=add(y, x)
return
+class RNN(Layer):
+ def __init__(self):
+ raise NotImplementedError
-class Vanilla_RNN(Layer):
+ def __call__(self, h0, *xs):
+ batchsize=xs[0].shape[0]
+ out=[]
+ h = self.step_forward(xs[0], h0, self.Wx, self.Wh, self.b)
+ out.append(h)
+ for x in xs[1:]:
+ assert x.shape[0] == batchsize
+ h = self.step_forward(x, h, self.Wx, self.Wh, self.b)
+ out.append(h)
+ return out, h
+class Vanilla_RNN(RNN):
def __init__(self, input_size, hidden_size, num_layers=1, nonlinearity='tanh', bias=True, batch_first=False, dropout=0, bidirectional=False):
self.nonlinearity=nonlinearity
@@ -1011,17 +1059,6 @@ class Vanilla_RNN(Layer):
self.b = Tensor(shape=B_shape, requires_grad=True, stores_grad=True)
self.b.set_value(0.0)
- def __call__(self, h0, *xs):
- batchsize=xs[0].shape[0]
- self.out=[]
- h = self.step_forward(xs[0], h0, self.Wx, self.Wh, self.b)
- self.out.append(h)
- for x in xs[1:]:
- assert x.shape[0] == batchsize
- h = self.step_forward(x, h, self.Wx, self.Wh, self.b)
- self.out.append(h)
- return self.out
-
def step_forward(self, x, h, Wx, Wh, b):
y1=matmul(x, Wx)
y2=matmul(h, Wh)
@@ -1035,4 +1072,86 @@ class Vanilla_RNN(Layer):
raise ValueError
return y
+class LSTM(RNN):
+
+ def __init__(self, input_size, hidden_size, nonlinearity='tanh', num_layers=1, bias=True, batch_first=False, dropout=0, bidirectional=False):
+ self.nonlinearity=nonlinearity
+
+ Wx_shape = (input_size, hidden_size)
+ self.Wx = []
+ for i in range(4):
+ w = Tensor(shape=Wx_shape, requires_grad=True, stores_grad=True)
+ w.gaussian(0.0, 1.0)
+ self.Wx.append(w)
+
+ Wh_shape = (hidden_size, hidden_size)
+ self.Wh = []
+ for i in range(4):
+ w = Tensor(shape=Wh_shape, requires_grad=True, stores_grad=True)
+ w.gaussian(0.0, 1.0)
+ self.Wh.append(w)
+
+ Bx_shape = (hidden_size,)
+ self.Bx = []
+ for i in range(4):
+ b = Tensor(shape=Bx_shape, requires_grad=True, stores_grad=True)
+ b.set_value(0.0)
+ self.Bx.append(b)
+
+ Bh_shape = (hidden_size,)
+ self.Bh = []
+ for i in range(4):
+ b = Tensor(shape=Bx_shape, requires_grad=True, stores_grad=True)
+ b.set_value(0.0)
+ self.Bh.append(b)
+
+ def __call__(self, h0, c0, *xs):
+ batchsize = xs[0].shape[0]
+ out = []
+ h, c = self.step_forward(
+ xs[0], h0, c0, self.Wx, self.Wh, self.Bx, self.Bh)
+ out.append(h)
+ for x in xs[1:]:
+ assert x.shape[0] == batchsize
+ h, c = self.step_forward(
+ x, h, c, self.Wx, self.Wh, self.Bx, self.Bh)
+ out.append(h)
+ return out, h, c
+
+ def step_forward(self, x, h, c, Wx, Wh, Bx, Bh):
+ y1 = matmul(x, Wx[0])
+ y1 = add_bias(y1, Bx[0], axis=0)
+ y2 = matmul(h, Wh[0])
+ y2 = add_bias(y2, Bh[0], axis=0)
+ i = add(y1, y2)
+ i = sigmoid(i)
+
+ y1 = matmul(x, Wx[1])
+ y1 = add_bias(y1, Bx[1], axis=0)
+ y2 = matmul(h, Wh[1])
+ y2 = add_bias(y2, Bh[1], axis=0)
+ f = add(y1, y2)
+ f = sigmoid(f)
+
+ y1 = matmul(x, Wx[2])
+ y1 = add_bias(y1, Bx[2], axis=0)
+ y2 = matmul(h, Wh[2])
+ y2 = add_bias(y2, Bh[2], axis=0)
+ o = add(y1, y2)
+ o = sigmoid(o)
+
+ y1 = matmul(x, Wx[3])
+ y1 = add_bias(y1, Bx[3], axis=0)
+ y2 = matmul(h, Wh[3])
+ y2 = add_bias(y2, Bh[3], axis=0)
+ g = add(y1, y2)
+ g = tanh(g)
+
+ cout1 = elemmatmul(f, c)
+ cout2 = elemmatmul(i, g)
+ cout = add(cout1, cout2)
+
+ hout = tanh(cout)
+ hout = elemmatmul(o, hout)
+ return hout, cout