You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by ni...@apache.org on 2019/03/24 04:52:30 UTC
[systemml] branch master updated: [SYSTEMML-540] Invoke update immediately after backward call at the script-level.

This is an automated email from the ASF dual-hosted git repository.

niketanpansare pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemml.git


The following commit(s) were added to refs/heads/master by this push:
     new b49ac76  [SYSTEMML-540] Invoke update immediately after backward call at the script-level.
b49ac76 is described below

commit b49ac760c180baa0582c2168c4b58fb3c0108bc4
Author: Niketan Pansare <np...@us.ibm.com>
AuthorDate: Sat Mar 23 21:51:45 2019 -0700

    [SYSTEMML-540] Invoke update immediately after backward call at the script-level.
    
    - This reduces the chance of unnecessary evictions especially when there are statement block cuts.
    - The configuration property `perform_fused_backward_update` allows the user to toggle this behavior and control the script-generation process.
    - Also, updated the release creation document to ensure that untracked files are not included in the artifacts.
    - For Resnet-200, the eviction time was reduced from 173.488 seconds to 60.048 seconds with minibatch size of 96.
---
 docs/release-creation-process.md                   |  3 +
 src/main/python/systemml/mllearn/estimators.py     |  8 ++-
 .../scala/org/apache/sysml/api/dl/Caffe2DML.scala  | 76 ++++++++++++++++++----
 .../scala/org/apache/sysml/api/dl/CaffeLayer.scala |  2 +
 4 files changed, 76 insertions(+), 13 deletions(-)

diff --git a/docs/release-creation-process.md b/docs/release-creation-process.md
index 3115390..bf05000 100644
--- a/docs/release-creation-process.md
+++ b/docs/release-creation-process.md
@@ -42,6 +42,9 @@ Step 1: Prepare the release.
 
 	# Extract latest code to a directory
 	<GitRepoHome>
+	
+	# Check if there are any untracked files (created by the unit tests) and remove them to avoid packing them in the artifacts
+	git status
 
 	# Go to dev/release directory
 	cd <GitRepoHome>/dev/release
diff --git a/src/main/python/systemml/mllearn/estimators.py b/src/main/python/systemml/mllearn/estimators.py
index 8d1e164..456280b 100644
--- a/src/main/python/systemml/mllearn/estimators.py
+++ b/src/main/python/systemml/mllearn/estimators.py
@@ -922,7 +922,8 @@ class Caffe2DML(BaseSystemMLClassifier):
             self.estimator.setWeightsToIgnore(ignore_weights)
 
     def set(self, debug=None, train_algo=None, test_algo=None, parallel_batches=None,
-            output_activations=None, perform_one_hot_encoding=None, parfor_parameters=None, inline_nn_library=None, use_builtin_lstm_fn=None):
+            output_activations=None, perform_one_hot_encoding=None, parfor_parameters=None, inline_nn_library=None, use_builtin_lstm_fn=None,
+            perform_fused_backward_update=None):
         """
         Set input to Caffe2DML
 
@@ -933,10 +934,11 @@ class Caffe2DML(BaseSystemMLClassifier):
         test_algo: can be minibatch, batch, allreduce_parallel_batches or allreduce (default: minibatch)
         parallel_batches: number of parallel batches
         output_activations: (developer flag) directory to output activations of each layer as csv while prediction. To be used only in batch mode (default: None)
-        perform_one_hot_encoding: should perform one-hot encoding in DML using table function (default: False)
+        perform_one_hot_encoding: should perform one-hot encoding in DML using table function (default: True)
         parfor_parameters: dictionary for parfor parameters when using allreduce-style algorithms (default: "")
         inline_nn_library: whether to inline the NN library when generating DML using Caffe2DML (default: False)
         use_builtin_lstm_fn: whether to use builtin lstm function for LSTM layer (default: True)
+        perform_fused_backward_update: whether to perform update immediately after backward pass at the script level. Supported for minibatch and batch algorithms. (default: True)
         """
         if debug is not None:
             self.estimator.setInput("$debug", str(debug).upper())
@@ -950,6 +952,8 @@ class Caffe2DML(BaseSystemMLClassifier):
             self.estimator.setInput("$parallel_batches", str(parallel_batches))
         if use_builtin_lstm_fn is not None:
             self.estimator.setInput("$use_builtin_lstm_fn", str(use_builtin_lstm_fn).upper())
+        if perform_fused_backward_update is not None:
+            self.estimator.setInput("$perform_fused_backward_update", str(perform_fused_backward_update).upper())
         if output_activations is not None:
             self.estimator.setInput(
                 "$output_activations",
diff --git a/src/main/scala/org/apache/sysml/api/dl/Caffe2DML.scala b/src/main/scala/org/apache/sysml/api/dl/Caffe2DML.scala
index e480dfc..c5a20db 100644
--- a/src/main/scala/org/apache/sysml/api/dl/Caffe2DML.scala
+++ b/src/main/scala/org/apache/sysml/api/dl/Caffe2DML.scala
@@ -304,14 +304,27 @@ class Caffe2DML(val sc: SparkContext,
     net.getLayers.map(layer => {net.getCaffeLayer(layer).debugLayer = isDebug})
     net.getLayers.map(layer => {net.getCaffeLayer(layer).caffe2dmlObj = this})
     net.getLayers.filter(layer => net.getCaffeLayer(layer).isInstanceOf[LSTM]).map(layer => {
-      if (inputs.containsKey("$use_builtin_lstm_fn")) 
-        net.getCaffeLayer(layer).asInstanceOf[LSTM].useBuiltinFunction(inputs.get("$use_builtin_lstm_fn").toLowerCase.toBoolean)
+      net.getCaffeLayer(layer).asInstanceOf[LSTM].useBuiltinFunction(getInputBooleanValue("$use_builtin_lstm_fn")) 
      })
   }
   
   // Comma is included
   def getParforParameters():String = if (inputs.containsKey("$parfor_parameters")) inputs.get("$parfor_parameters") else ""
 
+  def getInputBooleanValue(key:String):Boolean = {
+    if(inputs.containsKey(key))
+      return inputs.get(key).toLowerCase.toBoolean
+    else {
+      key match {
+        case "$debug" => false
+        case "$perform_one_hot_encoding" => true
+        case "$inline_nn_library" => false
+        case "$use_builtin_lstm_fn" => true
+        case "$perform_fused_backward_update" => true
+        case _ => throw new DMLRuntimeException("Unsupported input:" + key)
+      }
+    } 
+  }
   // ================================================================================================
   // The below method parses the provided network and solver file and generates DML script.
   def getTrainingScript(isSingleNode: Boolean): (Script, String, String) = {
@@ -320,16 +333,18 @@ class Caffe2DML(val sc: SparkContext,
     reset // Reset the state of DML generator for training script.
 
     // Flags passed by user
-    val DEBUG_TRAINING = if (inputs.containsKey("$debug")) inputs.get("$debug").toLowerCase.toBoolean else false
-    Caffe2DML.INLINE_NN_LIBRARY = if (inputs.containsKey("$inline_nn_library")) inputs.get("$inline_nn_library").toLowerCase.toBoolean else false
+    val DEBUG_TRAINING = getInputBooleanValue("$debug")
+    Caffe2DML.INLINE_NN_LIBRARY = getInputBooleanValue("$inline_nn_library")
     assign(tabDMLScript, "debug", if (DEBUG_TRAINING) "TRUE" else "FALSE")
     setDebugFlags(DEBUG_TRAINING)
 
     appendHeaders(net, solver, true) // Appends DML corresponding to source and externalFunction statements.
-    val performOneHotEncoding = !inputs.containsKey("$perform_one_hot_encoding") || inputs.get("$perform_one_hot_encoding").toBoolean
+    val performOneHotEncoding = getInputBooleanValue("$perform_one_hot_encoding")
     readInputData(net, true, performOneHotEncoding)         // Read X_full and y_full
     // Initialize the layers and solvers. Reads weights and bias if $weights is set.
     initWeights(net, solver, inputs.containsKey("$weights"), layersToIgnore)
+    
+    val performFusedBackwardUpdate = getInputBooleanValue("$perform_fused_backward_update")
 
     // Split into training and validation set
     // Initializes Caffe2DML.X, Caffe2DML.y, Caffe2DML.XVal, Caffe2DML.yVal and Caffe2DML.numImages
@@ -355,7 +370,13 @@ class Caffe2DML(val sc: SparkContext,
           getTrainingBatch(tabDMLScript)
           // -------------------------------------------------------
           // Perform forward, backward and update on minibatch
-          forward; backward; update
+          forward;
+          if(performFusedBackwardUpdate) {
+            backwardUpdate
+          }
+          else {
+            backward; update
+          }
           // -------------------------------------------------------
           if(solverParam.getDisplay > 0) {
             ifBlock("iter  %% " + solverParam.getDisplay + " == 0") {
@@ -385,7 +406,13 @@ class Caffe2DML(val sc: SparkContext,
           assign(tabDMLScript, "yb", Caffe2DML.y)
           // -------------------------------------------------------
           // Perform forward, backward and update on entire dataset
-          forward; backward; update
+          forward
+          if(performFusedBackwardUpdate) {
+            backwardUpdate
+          }
+          else {
+            backward; update
+          }
           // -------------------------------------------------------
           if(solverParam.getDisplay > 0) {
             // Show training/validation loss every epoch
@@ -416,6 +443,10 @@ class Caffe2DML(val sc: SparkContext,
             rightIndexing(tabDMLScript, "Xb", Caffe2DML.X, "beg", "end")
             rightIndexing(tabDMLScript, "yb", Caffe2DML.y, "beg", "end")
             forward; backward
+            if(performFusedBackwardUpdate && inputs.containsKey("$perform_fused_backward_update")) {
+              // Warn user only if the user explicitly ask for it
+              Caffe2DML.LOG.warn("Fused backward update is not supported for allreduce_parallel_batches")
+            }
             flattenGradients
             if(solverParam.getDisplay > 0) {
               ifBlock("(iter + j - 1)  %% " + solverParam.getDisplay + " == 0") {
@@ -447,6 +478,10 @@ class Caffe2DML(val sc: SparkContext,
             assign(tabDMLScript, "Xb", Caffe2DML.X + "[j,]")
             assign(tabDMLScript, "yb", Caffe2DML.y + "[j,]")
             forward; backward
+            if(performFusedBackwardUpdate && inputs.containsKey("$perform_fused_backward_update")) {
+              // Warn user only if the user explicitly ask for it
+              Caffe2DML.LOG.warn("Fused backward update is not supported for allreduce_parallel_batches")
+            }
             flattenGradients
           }
           aggregateAggGradients
@@ -513,7 +548,7 @@ class Caffe2DML(val sc: SparkContext,
   }
   
   private def displayTrainingLoss(lossLayer: IsLossLayer, performOneHotEncoding:Boolean): Unit = {
-    val DEBUG_TRAINING = if (inputs.containsKey("$debug")) inputs.get("$debug").toLowerCase.toBoolean else false
+    val DEBUG_TRAINING = getInputBooleanValue("$debug")
     tabDMLScript.append("# Compute training loss & accuracy\n")
     assign(tabDMLScript, "loss", "0"); assign(tabDMLScript, "accuracy", "0")
     lossLayer.computeLoss(dmlScript, numTabs)
@@ -619,6 +654,25 @@ class Caffe2DML(val sc: SparkContext,
     tabDMLScript.append("# Update the parameters\n")
     net.getLayers.map(layer => solver.update(tabDMLScript, net.getCaffeLayer(layer)))
   }
+  private def backwardUpdate(): Unit = {
+    tabDMLScript.append("# Perform backward pass and update the parameters\n")
+    var skipedLayer:String = null
+    for(layer <- net.getLayers.reverse) {
+      val caffeLayer = net.getCaffeLayer(layer)
+      caffeLayer.backward(tabDMLScript, "")
+      if(caffeLayer.isInstanceOf[Scale] && caffeLayer.asInstanceOf[Scale].isPartOfBatchNorm) {
+        skipedLayer = layer // Skip update
+      }
+      else {
+        solver.update(tabDMLScript, caffeLayer) // Perform update of the current layer
+        if(skipedLayer != null) {
+          // And then update the skipped layer (if any)
+          solver.update(tabDMLScript, net.getCaffeLayer(skipedLayer))
+          skipedLayer = null
+        }
+      }
+    }
+  }
   private def initializeGradients(parallel_batches: String): Unit = {
     tabDMLScript.append("# Data structure to store gradients computed in parallel\n")
     if(Caffe2DML.USE_PLUS_EQ) {
@@ -730,13 +784,13 @@ class Caffe2DMLModel(val numClasses: String, val sc: SparkContext, val solver: C
 
     reset // Reset the state of DML generator for training script.
 
-    val DEBUG_PREDICTION = if (estimator.inputs.containsKey("$debug")) estimator.inputs.get("$debug").toLowerCase.toBoolean else false
-    Caffe2DML.INLINE_NN_LIBRARY = if (estimator.inputs.containsKey("$inline_nn_library")) estimator.inputs.get("$inline_nn_library").toLowerCase.toBoolean else false
+    val DEBUG_PREDICTION =  estimator.getInputBooleanValue("$debug")
+    Caffe2DML.INLINE_NN_LIBRARY = estimator.getInputBooleanValue("$inline_nn_library")
     assign(tabDMLScript, "debug", if (DEBUG_PREDICTION) "TRUE" else "FALSE")
     estimator.setDebugFlags(DEBUG_PREDICTION)
 
     appendHeaders(net, solver, false) // Appends DML corresponding to source and externalFunction statements.
-    val performOneHotEncoding = !estimator.inputs.containsKey("$perform_one_hot_encoding") || estimator.inputs.get("$perform_one_hot_encoding").toBoolean
+    val performOneHotEncoding = estimator.getInputBooleanValue("$perform_one_hot_encoding")
     readInputData(net, false, performOneHotEncoding)         // Read X_full and y_full
     assign(tabDMLScript, "X", "X_full")
 
diff --git a/src/main/scala/org/apache/sysml/api/dl/CaffeLayer.scala b/src/main/scala/org/apache/sysml/api/dl/CaffeLayer.scala
index 62323d1..bf86f38 100644
--- a/src/main/scala/org/apache/sysml/api/dl/CaffeLayer.scala
+++ b/src/main/scala/org/apache/sysml/api/dl/CaffeLayer.scala
@@ -370,6 +370,7 @@ class BatchNorm(val param: LayerParameter, val id: Int, val net: CaffeNetwork) e
       val topLayers = net.getTopLayers(param.getName).map(l => net.getCaffeLayer(l)).toList
       if (topLayers.length != 1 && !topLayers(0).isInstanceOf[Scale]) throw new LanguageException("Only one top layer of type Scale allowed for BatchNorm")
       scaleLayer = topLayers(0).asInstanceOf[Scale]
+      scaleLayer.isPartOfBatchNorm = true
     }
   def numChannels = bottomLayerOutputShape._1
   def Hin         = bottomLayerOutputShape._2
@@ -385,6 +386,7 @@ class Scale(val param: LayerParameter, val id: Int, val net: CaffeNetwork) exten
   override def backward(dmlScript: StringBuilder, outSuffix: String): Unit = assignDoutToDX(dmlScript, outSuffix)
   override def weightShape(): Array[Int]                                   = Array(bottomLayerOutputShape._1.toInt, 1)
   override def biasShape(): Array[Int]                                     = Array(bottomLayerOutputShape._1.toInt, 1)
+  var isPartOfBatchNorm = false
 }
 // ------------------------------------------------------------------