You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@singa.apache.org by wa...@apache.org on 2018/08/16 08:44:29 UTC

[1/4] incubator-singa git commit: SINGA-387 Modified the design of autograd backward engine and correct some mistakes in it

Repository: incubator-singa
Updated Branches:
  refs/heads/master f2f4d1f9c -> 770d6cdb6


SINGA-387 Modified the design of autograd backward engine and correct some mistakes in it

one of alternative solutions, the simplest one:
- output every intermediate gradient once it is calcualted and sum these gradients togather in the funciton gradients().


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/6c28abde
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/6c28abde
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/6c28abde

Branch: refs/heads/master
Commit: 6c28abdeb6929334ba10d327fe1fd80e2d0b604c
Parents: f2f4d1f
Author: xuewanqi <xu...@outlook.com>
Authored: Thu Aug 9 15:08:06 2018 +0000
Committer: xuewanqi <xu...@outlook.com>
Committed: Mon Aug 13 05:59:06 2018 +0000

----------------------------------------------------------------------
 python/singa/autograd.py | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6c28abde/python/singa/autograd.py
----------------------------------------------------------------------
diff --git a/python/singa/autograd.py b/python/singa/autograd.py
index a084764..007af27 100755
--- a/python/singa/autograd.py
+++ b/python/singa/autograd.py
@@ -64,7 +64,10 @@ def infer_dependency(op):
 def gradients(y, dy=None):
     grads = {}  # mapping: x->dx if x.stores_grad
     for p, dp in backward(y, dy):
-        gradients[p] = dp
+        if not grads.has_key(p):
+            grads[p] = dp
+        else: 
+            grads[p] += dp
     return grads
 
 
@@ -96,7 +99,13 @@ def backward(y, dy=None):
     not_ready = {}  # mapping: op->[dy]
 
     if y.stores_grad:
-        gradients[y] = dy
+        #gradients[y] = dy
+        if isinstance(dy, float):
+            g=np.array(dy)
+        else:
+            g=dy
+        tg = Tensor(device=g.device(), data=g)
+        yield (y, tg)
 
     while len(ready) > 0:
         op, dys = ready.pop()
@@ -135,7 +144,12 @@ def backward(y, dy=None):
                     dxs[y_idx] += dx
             if y_stores_grad:
                 # store the gradient for final return, e.g. if x is parameter
-                g = not_ready[src_op][y_idx]
+
+                # g = not_ready[src_op][y_idx]
+
+                g = dx # connot confirm that the gradient of a parameter is calculated completely. May disobey some optimize algorithms as the engine transmit 
+                       # a gradient (partly) once it is calculated which may cause wrongly records of some optimizer parameters.
+
                 tg = Tensor(device=g.device(), data=g)
                 yield (y, tg)
             dependency[src_op] -= 1

[3/4] incubator-singa git commit: SINGA-387 Modified the design of autograd backward engine and correct some mistakes in it

Posted by wa...@apache.org.

SINGA-387 Modified the design of autograd backward engine and correct some mistakes in it

Optimize the design of autograd engine:
- differentiate creator of inputs and creator of parameters, even though they are both Dummy class.
- this can avoid unnecessary memory use, maining in store unnecessary gradients.


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/b55b046c
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/b55b046c
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/b55b046c

Branch: refs/heads/master
Commit: b55b046ccac33876a28861cba6badbddfae75788
Parents: 2fea345
Author: xuewanqi <xu...@outlook.com>
Authored: Fri Aug 10 05:57:18 2018 +0000
Committer: xuewanqi <xu...@outlook.com>
Committed: Mon Aug 13 06:07:18 2018 +0000

----------------------------------------------------------------------
 python/singa/autograd.py | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/b55b046c/python/singa/autograd.py
----------------------------------------------------------------------
diff --git a/python/singa/autograd.py b/python/singa/autograd.py
index 94214fc..56b5498 100755
--- a/python/singa/autograd.py
+++ b/python/singa/autograd.py
@@ -54,11 +54,18 @@ def infer_dependency(op):
         for src_op, _, _, _ in cur_op.src:
             if src_op not in dependency_count:
                 # dependency[src_op] = [Counter() for _ in src_op.y_id2idx]
-                dependency_count[src_op] = 0
-                queue.append(src_op)
+                if isinstance(src_op, Dummy):
+                    # only when a Dummy operator needs store grads, its dependency needs to be counted.
+                    if src_op.stores_grad:
+                        dependency_count[src_op] = 0
+                        queue.append(src_op)
+                else:
+                    dependency_count[src_op] = 0
+                    queue.append(src_op)
             # y_idx = src_op.y_id2idx[x_id]
             # dependency[src_op][y_idx][cur_op] += 1
-            dependency_count[src_op] += 1
+            if dependency_count.has_key(src_op):
+                dependency_count[src_op] += 1
     return dependency_count
 
 
@@ -127,6 +134,11 @@ def backward(y, dy=None):
             # the gradient of all its outputs are available, i.e. all children
             # operations have been backwarded.
             # y is None if y.stores_grad is false; otherwise it is a Tensor
+
+            if isinstance(src_op, Dummy):
+                if not src_op.stores_grad:
+                    continue
+                    
             y_idx = src_op.y_id2idx[x_id]
             if src_op not in not_ready:
                 # src_op may have mulitple outputs
@@ -253,6 +265,7 @@ class Dummy(Operation):
         self.name = name
         self.src = []
         self.y_id2idx = {id(tensor): 0}
+        self.stores_grad = tensor.stores_grad
         self.requires_grad = False

[4/4] incubator-singa git commit: SINGA-387 Modified the design of autograd backward engine and correct some mistakes in it

Posted by wa...@apache.org.

SINGA-387 Modified the design of autograd backward engine and correct some mistakes in it

- test the modified engine by running example networks in /example/autograd folder and fix some bugs. all files can run without error.


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/770d6cdb
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/770d6cdb
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/770d6cdb

Branch: refs/heads/master
Commit: 770d6cdb65ae528cfc9cae6e357198648c088168
Parents: b55b046
Author: xuewanqi <xu...@outlook.com>
Authored: Tue Aug 14 02:55:52 2018 +0000
Committer: xuewanqi <xu...@outlook.com>
Committed: Tue Aug 14 02:55:52 2018 +0000

----------------------------------------------------------------------
 examples/autograd/resnet.py | 8 ++++----
 python/singa/opt.py         | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/770d6cdb/examples/autograd/resnet.py
----------------------------------------------------------------------
diff --git a/examples/autograd/resnet.py b/examples/autograd/resnet.py
old mode 100644
new mode 100755
index 72c33ed..fab8129
--- a/examples/autograd/resnet.py
+++ b/examples/autograd/resnet.py
@@ -227,8 +227,8 @@ def resnet152(pretrained=False, **kwargs):
 if __name__ == '__main__':
     model = resnet18()
     print('Start intialization............')
-    dev = device.create_cuda_gpu_on(1)
-
+    dev = device.create_cuda_gpu_on(0)
+    #dev = device.create_cuda_gpu()
     niters = 200
     batch_size = 16
     IMG_SIZE = 224
@@ -248,5 +248,5 @@ if __name__ == '__main__':
             loss = autograd.softmax_cross_entropy(x, ty)
             for p, g in autograd.backward(loss):
                 # print(p.shape, g.shape)
-                # sgd.update(p, g)
-                pass
+                sgd.update(p, g)
+                #pass

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/770d6cdb/python/singa/opt.py
----------------------------------------------------------------------
diff --git a/python/singa/opt.py b/python/singa/opt.py
old mode 100644
new mode 100755
index 6c59f28..f744f57
--- a/python/singa/opt.py
+++ b/python/singa/opt.py
@@ -30,7 +30,7 @@ class Optimizer(object):
 
     def __init__(self, config):
         self.default_config = config
-        self.step = 0
+        self.iter = 0
         self.param2config = {}
         self.param2state = {}
 
@@ -46,7 +46,7 @@ class Optimizer(object):
 
     def step(self):
         r"""To increment the step counter"""
-        self.step += 1
+        self.iter += 1
 
     def register(self, param_group, config):
         for param in param_group:

[2/4] incubator-singa git commit: SINGA-387 Modified the design of autograd backward engine and correct some mistakes in it

Posted by wa...@apache.org.

SINGA-387 Modified the design of autograd backward engine and correct some mistakes in it

another solution:
- let Dummy involved in dependency counting system
- modified former backward function


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/2fea345c
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/2fea345c
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/2fea345c

Branch: refs/heads/master
Commit: 2fea345cf13df1e3a2511c5bb80732647abc4b45
Parents: 6c28abd
Author: xuewanqi <xu...@outlook.com>
Authored: Fri Aug 10 05:12:31 2018 +0000
Committer: xuewanqi <xu...@outlook.com>
Committed: Mon Aug 13 06:03:28 2018 +0000

----------------------------------------------------------------------
 python/singa/autograd.py | 31 +++++++++++++++----------------
 1 file changed, 15 insertions(+), 16 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/2fea345c/python/singa/autograd.py
----------------------------------------------------------------------
diff --git a/python/singa/autograd.py b/python/singa/autograd.py
index 007af27..94214fc 100755
--- a/python/singa/autograd.py
+++ b/python/singa/autograd.py
@@ -44,14 +44,15 @@ def infer_dependency(op):
         a Counter instance with the operation as the key,
         and the number of operations that are depending on it as the value
     '''
-    # dependency = {}
+    # not count the dependency of current op.
+    # if the current op is not a terminal op, then this function may just
+    # count dependency of a branch.
     dependency_count = Counter()
     queue = deque([op])
     while len(queue) > 0:
         cur_op = queue.pop()
         for src_op, _, _, _ in cur_op.src:
-            if src_op not in dependency_count and \
-                    (not isinstance(src_op, Dummy)):
+            if src_op not in dependency_count:
                 # dependency[src_op] = [Counter() for _ in src_op.y_id2idx]
                 dependency_count[src_op] = 0
                 queue.append(src_op)
@@ -64,10 +65,7 @@ def infer_dependency(op):
 def gradients(y, dy=None):
     grads = {}  # mapping: x->dx if x.stores_grad
     for p, dp in backward(y, dy):
-        if not grads.has_key(p):
-            grads[p] = dp
-        else: 
-            grads[p] += dp
+        grads[p] = dp
     return grads
 
 
@@ -142,20 +140,21 @@ def backward(y, dy=None):
                     # add the gradient from another children operation that
                     # uses y_idx'th output of src_op as input arg
                     dxs[y_idx] += dx
-            if y_stores_grad:
-                # store the gradient for final return, e.g. if x is parameter
-
-                # g = not_ready[src_op][y_idx]
+            
+            dependency[src_op] -= 1
 
-                g = dx # connot confirm that the gradient of a parameter is calculated completely. May disobey some optimize algorithms as the engine transmit 
-                       # a gradient (partly) once it is calculated which may cause wrongly records of some optimizer parameters.
+            if y_stores_grad:
+                if dependency[src_op] == 0:
+                    # store the gradient for final return, e.g. if x is parameter
+                    # may cause a delay output, as only after src_op is ready then output, not the current outlet of src_op is ready then output.
+                    g = not_ready[src_op][y_idx]
+                    tg = Tensor(device=g.device(), data=g)
+                    yield (y, tg)
 
-                tg = Tensor(device=g.device(), data=g)
-                yield (y, tg)
-            dependency[src_op] -= 1
             if src_op.requires_grad is True:
                 if dependency[src_op] == 0:
                     if not isinstance(src_op, Dummy):
+                        #Dummy can be in not_ready list but cannot be in ready list.
                         ready.append((src_op, not_ready[src_op]))
                     del not_ready[src_op]
         del op  # delete the operation to free all tensors from this op