You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@singa.apache.org by wa...@apache.org on 2018/08/16 08:44:31 UTC
[3/4] incubator-singa git commit: SINGA-387 Modified the design of autograd backward engine and correct some mistakes in it

SINGA-387 Modified the design of autograd backward engine and correct some mistakes in it

Optimize the design of autograd engine:
- differentiate creator of inputs and creator of parameters, even though they are both Dummy class.
- this can avoid unnecessary memory use, maining in store unnecessary gradients.


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/b55b046c
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/b55b046c
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/b55b046c

Branch: refs/heads/master
Commit: b55b046ccac33876a28861cba6badbddfae75788
Parents: 2fea345
Author: xuewanqi <xu...@outlook.com>
Authored: Fri Aug 10 05:57:18 2018 +0000
Committer: xuewanqi <xu...@outlook.com>
Committed: Mon Aug 13 06:07:18 2018 +0000

----------------------------------------------------------------------
 python/singa/autograd.py | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/b55b046c/python/singa/autograd.py
----------------------------------------------------------------------
diff --git a/python/singa/autograd.py b/python/singa/autograd.py
index 94214fc..56b5498 100755
--- a/python/singa/autograd.py
+++ b/python/singa/autograd.py
@@ -54,11 +54,18 @@ def infer_dependency(op):
         for src_op, _, _, _ in cur_op.src:
             if src_op not in dependency_count:
                 # dependency[src_op] = [Counter() for _ in src_op.y_id2idx]
-                dependency_count[src_op] = 0
-                queue.append(src_op)
+                if isinstance(src_op, Dummy):
+                    # only when a Dummy operator needs store grads, its dependency needs to be counted.
+                    if src_op.stores_grad:
+                        dependency_count[src_op] = 0
+                        queue.append(src_op)
+                else:
+                    dependency_count[src_op] = 0
+                    queue.append(src_op)
             # y_idx = src_op.y_id2idx[x_id]
             # dependency[src_op][y_idx][cur_op] += 1
-            dependency_count[src_op] += 1
+            if dependency_count.has_key(src_op):
+                dependency_count[src_op] += 1
     return dependency_count
 
 
@@ -127,6 +134,11 @@ def backward(y, dy=None):
             # the gradient of all its outputs are available, i.e. all children
             # operations have been backwarded.
             # y is None if y.stores_grad is false; otherwise it is a Tensor
+
+            if isinstance(src_op, Dummy):
+                if not src_op.stores_grad:
+                    continue
+                    
             y_idx = src_op.y_id2idx[x_id]
             if src_op not in not_ready:
                 # src_op may have mulitple outputs
@@ -253,6 +265,7 @@ class Dummy(Operation):
         self.name = name
         self.src = []
         self.y_id2idx = {id(tensor): 0}
+        self.stores_grad = tensor.stores_grad
         self.requires_grad = False