You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@singa.apache.org by wa...@apache.org on 2018/08/16 08:44:30 UTC

[2/4] incubator-singa git commit: SINGA-387 Modified the design of autograd backward engine and correct some mistakes in it

SINGA-387 Modified the design of autograd backward engine and correct some mistakes in it

another solution:
- let Dummy involved in dependency counting system
- modified former backward function


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/2fea345c
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/2fea345c
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/2fea345c

Branch: refs/heads/master
Commit: 2fea345cf13df1e3a2511c5bb80732647abc4b45
Parents: 6c28abd
Author: xuewanqi <xu...@outlook.com>
Authored: Fri Aug 10 05:12:31 2018 +0000
Committer: xuewanqi <xu...@outlook.com>
Committed: Mon Aug 13 06:03:28 2018 +0000

----------------------------------------------------------------------
 python/singa/autograd.py | 31 +++++++++++++++----------------
 1 file changed, 15 insertions(+), 16 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/2fea345c/python/singa/autograd.py
----------------------------------------------------------------------
diff --git a/python/singa/autograd.py b/python/singa/autograd.py
index 007af27..94214fc 100755
--- a/python/singa/autograd.py
+++ b/python/singa/autograd.py
@@ -44,14 +44,15 @@ def infer_dependency(op):
         a Counter instance with the operation as the key,
         and the number of operations that are depending on it as the value
     '''
-    # dependency = {}
+    # not count the dependency of current op.
+    # if the current op is not a terminal op, then this function may just
+    # count dependency of a branch.
     dependency_count = Counter()
     queue = deque([op])
     while len(queue) > 0:
         cur_op = queue.pop()
         for src_op, _, _, _ in cur_op.src:
-            if src_op not in dependency_count and \
-                    (not isinstance(src_op, Dummy)):
+            if src_op not in dependency_count:
                 # dependency[src_op] = [Counter() for _ in src_op.y_id2idx]
                 dependency_count[src_op] = 0
                 queue.append(src_op)
@@ -64,10 +65,7 @@ def infer_dependency(op):
 def gradients(y, dy=None):
     grads = {}  # mapping: x->dx if x.stores_grad
     for p, dp in backward(y, dy):
-        if not grads.has_key(p):
-            grads[p] = dp
-        else: 
-            grads[p] += dp
+        grads[p] = dp
     return grads
 
 
@@ -142,20 +140,21 @@ def backward(y, dy=None):
                     # add the gradient from another children operation that
                     # uses y_idx'th output of src_op as input arg
                     dxs[y_idx] += dx
-            if y_stores_grad:
-                # store the gradient for final return, e.g. if x is parameter
-
-                # g = not_ready[src_op][y_idx]
+            
+            dependency[src_op] -= 1
 
-                g = dx # connot confirm that the gradient of a parameter is calculated completely. May disobey some optimize algorithms as the engine transmit 
-                       # a gradient (partly) once it is calculated which may cause wrongly records of some optimizer parameters.
+            if y_stores_grad:
+                if dependency[src_op] == 0:
+                    # store the gradient for final return, e.g. if x is parameter
+                    # may cause a delay output, as only after src_op is ready then output, not the current outlet of src_op is ready then output.
+                    g = not_ready[src_op][y_idx]
+                    tg = Tensor(device=g.device(), data=g)
+                    yield (y, tg)
 
-                tg = Tensor(device=g.device(), data=g)
-                yield (y, tg)
-            dependency[src_op] -= 1
             if src_op.requires_grad is True:
                 if dependency[src_op] == 0:
                     if not isinstance(src_op, Dummy):
+                        #Dummy can be in not_ready list but cannot be in ready list.
                         ready.append((src_op, not_ready[src_op]))
                     del not_ready[src_op]
         del op  # delete the operation to free all tensors from this op