You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@singa.apache.org by wa...@apache.org on 2018/08/22 14:54:58 UTC
[2/5] incubator-singa git commit: SINGA-388 Develop some RNN layers by calling tiny operations like matmul, addbias

SINGA-388 Develop some RNN layers by calling tiny operations like matmul, addbias

- Develop LSTM by calling tiny operations.
- Add some operations which are necessary for LSTM layer into autograd.py
- redesign the structure of RNN Layer.
- LSTM layer has passed test.(return correct number of gradients)


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/5dc17b91
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/5dc17b91
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/5dc17b91

Branch: refs/heads/master
Commit: 5dc17b91e045e6318d224bebee8912da6c646596
Parents: 7df6a5d
Author: xuewanqi <xu...@outlook.com>
Authored: Mon Aug 13 14:11:37 2018 +0000
Committer: xuewanqi <xu...@outlook.com>
Committed: Thu Aug 16 11:40:25 2018 +0000

----------------------------------------------------------------------
 python/singa/autograd.py | 143 ++++++++++++++++++++++++++++++++++++++----
 1 file changed, 131 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5dc17b91/python/singa/autograd.py
----------------------------------------------------------------------
diff --git a/python/singa/autograd.py b/python/singa/autograd.py
index 4c7959c..b18e08e 100644
--- a/python/singa/autograd.py
+++ b/python/singa/autograd.py
@@ -985,6 +985,41 @@ class Tanh(Operation):
 def tanh(x):
     return Tanh()(x)[0]
 
+class Sigmoid(Operation):
+
+    def forward(self, x):
+        out = singa.Sigmoid(x)
+        if training:
+            self.cache = (out,)
+        return out
+
+    def backward(self, dy):
+        dx = singa.MultFloat(self.cache[0], -1.0)
+        dx = singa.AddFloat(dx, 1.0)
+        dx = singa.__mul__(self.cache[0], dx)
+        dx = singa.__mul__(dy, dx)
+        return dx
+
+
+def sigmoid(x):
+    return Sigmoid()(x)[0]
+
+
+class ElemMatmul(Operation):
+
+    def forward(self, x1, x2):
+        if training:
+            self.cache = (x1, x2)
+        return singa.__mul__(x1, x2)
+
+    def backward(self, dy):
+        dx1 = singa.__mul__(dy, self.cache[1])
+        dx2 = singa.__mul__(dy, self.cache[0])
+        return dx1, dx2
+
+
+def elemmatmul(x, y):
+    return ElemMatmul()(x, y)[0]
 
 def add_all(*xs):
     assert len(xs) > 2
@@ -993,9 +1028,22 @@ def add_all(*xs):
         y=add(y, x)
     return
 
+class RNN(Layer):
+    def __init__(self):
+        raise NotImplementedError
 
-class Vanilla_RNN(Layer):
+    def __call__(self, h0, *xs):
+        batchsize=xs[0].shape[0]
+        out=[]
+        h = self.step_forward(xs[0], h0, self.Wx, self.Wh, self.b)
+        out.append(h)
+        for x in xs[1:]:
+            assert x.shape[0] == batchsize
+            h = self.step_forward(x, h, self.Wx, self.Wh, self.b)
+            out.append(h)
+        return out, h
 
+class Vanilla_RNN(RNN):
     def __init__(self, input_size, hidden_size, num_layers=1, nonlinearity='tanh', bias=True, batch_first=False, dropout=0, bidirectional=False):
         self.nonlinearity=nonlinearity
 
@@ -1011,17 +1059,6 @@ class Vanilla_RNN(Layer):
         self.b = Tensor(shape=B_shape, requires_grad=True, stores_grad=True)
         self.b.set_value(0.0)
 
-    def __call__(self, h0, *xs):
-        batchsize=xs[0].shape[0]
-        self.out=[]
-        h = self.step_forward(xs[0], h0, self.Wx, self.Wh, self.b)
-        self.out.append(h)
-        for x in xs[1:]:
-            assert x.shape[0] == batchsize
-            h = self.step_forward(x, h, self.Wx, self.Wh, self.b)
-            self.out.append(h)
-        return self.out
-
     def step_forward(self, x, h, Wx, Wh, b):
         y1=matmul(x, Wx)
         y2=matmul(h, Wh)
@@ -1035,4 +1072,86 @@ class Vanilla_RNN(Layer):
             raise ValueError
         return y
 
+class LSTM(RNN):
+
+    def __init__(self, input_size, hidden_size, nonlinearity='tanh', num_layers=1, bias=True, batch_first=False, dropout=0, bidirectional=False):
+        self.nonlinearity=nonlinearity
+
+        Wx_shape = (input_size, hidden_size)
+        self.Wx = []
+        for i in range(4):
+            w = Tensor(shape=Wx_shape, requires_grad=True, stores_grad=True)
+            w.gaussian(0.0, 1.0)
+            self.Wx.append(w)
+
+        Wh_shape = (hidden_size, hidden_size)
+        self.Wh = []
+        for i in range(4):
+            w = Tensor(shape=Wh_shape, requires_grad=True, stores_grad=True)
+            w.gaussian(0.0, 1.0)
+            self.Wh.append(w)
+
+        Bx_shape = (hidden_size,)
+        self.Bx = []
+        for i in range(4):
+            b = Tensor(shape=Bx_shape, requires_grad=True, stores_grad=True)
+            b.set_value(0.0)
+            self.Bx.append(b)
+
+        Bh_shape = (hidden_size,)
+        self.Bh = []
+        for i in range(4):
+            b = Tensor(shape=Bx_shape, requires_grad=True, stores_grad=True)
+            b.set_value(0.0)
+            self.Bh.append(b)
+
+    def __call__(self, h0, c0, *xs):
+        batchsize = xs[0].shape[0]
+        out = []
+        h, c = self.step_forward(
+            xs[0], h0, c0, self.Wx, self.Wh, self.Bx, self.Bh)
+        out.append(h)
+        for x in xs[1:]:
+            assert x.shape[0] == batchsize
+            h, c = self.step_forward(
+                x, h, c, self.Wx, self.Wh, self.Bx, self.Bh)
+            out.append(h)
+        return out, h, c
+
+    def step_forward(self, x, h, c, Wx, Wh, Bx, Bh):
+        y1 = matmul(x, Wx[0])
+        y1 = add_bias(y1, Bx[0], axis=0)
+        y2 = matmul(h, Wh[0])
+        y2 = add_bias(y2, Bh[0], axis=0)
+        i = add(y1, y2)
+        i = sigmoid(i)
+
+        y1 = matmul(x, Wx[1])
+        y1 = add_bias(y1, Bx[1], axis=0)
+        y2 = matmul(h, Wh[1])
+        y2 = add_bias(y2, Bh[1], axis=0)
+        f = add(y1, y2)
+        f = sigmoid(f)
+
+        y1 = matmul(x, Wx[2])
+        y1 = add_bias(y1, Bx[2], axis=0)
+        y2 = matmul(h, Wh[2])
+        y2 = add_bias(y2, Bh[2], axis=0)
+        o = add(y1, y2)
+        o = sigmoid(o)
+
+        y1 = matmul(x, Wx[3])
+        y1 = add_bias(y1, Bx[3], axis=0)
+        y2 = matmul(h, Wh[3])
+        y2 = add_bias(y2, Bh[3], axis=0)
+        g = add(y1, y2)
+        g = tanh(g)
+
+        cout1 = elemmatmul(f, c)
+        cout2 = elemmatmul(i, g)
+        cout = add(cout1, cout2)
+
+        hout = tanh(cout)
+        hout = elemmatmul(o, hout)
+        return hout, cout