You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by ni...@apache.org on 2019/03/20 17:55:41 UTC
[systemml] branch master updated: [SYSTEMML-540] Integrate the lstm builtin function in Keras2DML

This is an automated email from the ASF dual-hosted git repository.

niketanpansare pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemml.git


The following commit(s) were added to refs/heads/master by this push:
     new fbd3aab  [SYSTEMML-540] Integrate the lstm builtin function in Keras2DML
fbd3aab is described below

commit fbd3aabbda8027e34744ad97a81f1376cf5f2041
Author: Niketan Pansare <np...@us.ibm.com>
AuthorDate: Wed Mar 20 10:54:48 2019 -0700

    [SYSTEMML-540] Integrate the lstm builtin function in Keras2DML
    
    - Also, migrated the builtin function layer from staging to nn.
    - Updated the GPU tests.
---
 scripts/nn/layers/conv2d.dml                          |  2 ++
 scripts/nn/layers/lstm.dml                            |  2 ++
 .../nn/layers/{lstm_staging.dml => lstm_builtin.dml}  |  4 ++--
 scripts/nn/layers/max_pool2d.dml                      |  2 ++
 src/main/python/systemml/mllearn/estimators.py        |  5 ++++-
 .../scala/org/apache/sysml/api/dl/Caffe2DML.scala     |  4 ++++
 .../scala/org/apache/sysml/api/dl/CaffeLayer.scala    | 19 +++++++++++++++----
 .../java/org/apache/sysml/test/gpu/LstmCPUTest.java   |  2 +-
 src/test/java/org/apache/sysml/test/gpu/LstmTest.java |  2 +-
 9 files changed, 33 insertions(+), 9 deletions(-)

diff --git a/scripts/nn/layers/conv2d.dml b/scripts/nn/layers/conv2d.dml
index 49d887b..de40668 100644
--- a/scripts/nn/layers/conv2d.dml
+++ b/scripts/nn/layers/conv2d.dml
@@ -21,6 +21,8 @@
 
 /*
  * 2D Convolutional layer.
+ *
+ * Consider using conv2d_builtin.dml for better performance.
  */
 source("nn/util.dml") as util
 
diff --git a/scripts/nn/layers/lstm.dml b/scripts/nn/layers/lstm.dml
index cd1557d..838cc44 100644
--- a/scripts/nn/layers/lstm.dml
+++ b/scripts/nn/layers/lstm.dml
@@ -21,6 +21,8 @@
 
 /*
  * LSTM layer.
+ *
+ * Consider using lstm_builtin.dml for better performance.
  */
 source("nn/layers/sigmoid.dml") as sigmoid
 source("nn/layers/tanh.dml") as tanh
diff --git a/scripts/nn/layers/lstm_staging.dml b/scripts/nn/layers/lstm_builtin.dml
similarity index 98%
rename from scripts/nn/layers/lstm_staging.dml
rename to scripts/nn/layers/lstm_builtin.dml
index f1934da..95661f8 100644
--- a/scripts/nn/layers/lstm_staging.dml
+++ b/scripts/nn/layers/lstm_builtin.dml
@@ -21,9 +21,9 @@
 
 /*
  * LSTM layer.
+ * 
+ * This implementation uses a built-in operator for higher performance.
  */
-source("nn/layers/sigmoid.dml") as sigmoid
-source("nn/layers/tanh.dml") as tanh
 
 forward = function(matrix[double] X, matrix[double] W, matrix[double] b, 
                    boolean return_sequences, matrix[double] out0, matrix[double] c0)
diff --git a/scripts/nn/layers/max_pool2d.dml b/scripts/nn/layers/max_pool2d.dml
index fba1a4c..ee57141 100644
--- a/scripts/nn/layers/max_pool2d.dml
+++ b/scripts/nn/layers/max_pool2d.dml
@@ -21,6 +21,8 @@
 
 /*
  * Max Pooling layer.
+ *
+ * Consider using max_pool2d_builtin.dml for better performance.
  */
 source("nn/util.dml") as util
 
diff --git a/src/main/python/systemml/mllearn/estimators.py b/src/main/python/systemml/mllearn/estimators.py
index 144cf66..d6aa8e8 100644
--- a/src/main/python/systemml/mllearn/estimators.py
+++ b/src/main/python/systemml/mllearn/estimators.py
@@ -924,7 +924,7 @@ class Caffe2DML(BaseSystemMLClassifier):
             self.estimator.setWeightsToIgnore(ignore_weights)
 
     def set(self, debug=None, train_algo=None, test_algo=None, parallel_batches=None,
-            output_activations=None, perform_one_hot_encoding=None, parfor_parameters=None, inline_nn_library=None):
+            output_activations=None, perform_one_hot_encoding=None, parfor_parameters=None, inline_nn_library=None, use_builtin_lstm_fn=None):
         """
         Set input to Caffe2DML
 
@@ -938,6 +938,7 @@ class Caffe2DML(BaseSystemMLClassifier):
         perform_one_hot_encoding: should perform one-hot encoding in DML using table function (default: False)
         parfor_parameters: dictionary for parfor parameters when using allreduce-style algorithms (default: "")
         inline_nn_library: whether to inline the NN library when generating DML using Caffe2DML (default: False)
+        use_builtin_lstm_fn: whether to use builtin lstm function for LSTM layer (default: True)
         """
         if debug is not None:
             self.estimator.setInput("$debug", str(debug).upper())
@@ -949,6 +950,8 @@ class Caffe2DML(BaseSystemMLClassifier):
             self.estimator.setInput("$test_algo", str(test_algo).lower())
         if parallel_batches is not None:
             self.estimator.setInput("$parallel_batches", str(parallel_batches))
+        if use_builtin_lstm_fn is not None:
+            self.estimator.setInput("$use_builtin_lstm_fn", str(use_builtin_lstm_fn).upper())
         if output_activations is not None:
             self.estimator.setInput(
                 "$output_activations",
diff --git a/src/main/scala/org/apache/sysml/api/dl/Caffe2DML.scala b/src/main/scala/org/apache/sysml/api/dl/Caffe2DML.scala
index 13f8a65..e480dfc 100644
--- a/src/main/scala/org/apache/sysml/api/dl/Caffe2DML.scala
+++ b/src/main/scala/org/apache/sysml/api/dl/Caffe2DML.scala
@@ -303,6 +303,10 @@ class Caffe2DML(val sc: SparkContext,
   def setDebugFlags(isDebug:Boolean):Unit = {
     net.getLayers.map(layer => {net.getCaffeLayer(layer).debugLayer = isDebug})
     net.getLayers.map(layer => {net.getCaffeLayer(layer).caffe2dmlObj = this})
+    net.getLayers.filter(layer => net.getCaffeLayer(layer).isInstanceOf[LSTM]).map(layer => {
+      if (inputs.containsKey("$use_builtin_lstm_fn")) 
+        net.getCaffeLayer(layer).asInstanceOf[LSTM].useBuiltinFunction(inputs.get("$use_builtin_lstm_fn").toLowerCase.toBoolean)
+     })
   }
   
   // Comma is included
diff --git a/src/main/scala/org/apache/sysml/api/dl/CaffeLayer.scala b/src/main/scala/org/apache/sysml/api/dl/CaffeLayer.scala
index f405fb2..47920ca 100644
--- a/src/main/scala/org/apache/sysml/api/dl/CaffeLayer.scala
+++ b/src/main/scala/org/apache/sysml/api/dl/CaffeLayer.scala
@@ -986,6 +986,10 @@ class RNN(val param: LayerParameter, val id: Int, val net: CaffeNetwork) extends
 class LSTM(val param: LayerParameter, val id: Int, val net: CaffeNetwork) extends CaffeLayer with HasWeight with HasBias {
   val return_sequences = param.getRecurrentParam.getReturnSequences
   
+  var _useBuiltinFunction = true
+  def useBuiltinFunction(enabled:Boolean): Unit = {
+    _useBuiltinFunction = enabled
+  }
   // ---------------------------------------------------------
   // Note: since Caffe doesnot have return_sequences, number of output is same as number of neurons
   def M():String = param.getRecurrentParam.getNumOutput.toString
@@ -994,7 +998,7 @@ class LSTM(val param: LayerParameter, val id: Int, val net: CaffeNetwork) extend
   def timesteps():String = bottomLayerOutputShape._1
   def input_features():String = bottomLayerOutputShape._2
   def output_features():Int = param.getRecurrentParam.getNumOutput
-  override def sourceFileName = "lstm"
+  override def sourceFileName = if(_useBuiltinFunction) "lstm_builtin" else "lstm" 
   override def outputShape               = if(return_sequences) (timesteps, output_features.toString, "1") else (output_features.toString, "1", "1")
   override def biasShape(): Array[Int]   = Array(1, 4*M.toInt)
   override def weightShape(): Array[Int] = Array(input_features.toInt + M.toInt, 4*M.toInt)
@@ -1009,17 +1013,24 @@ class LSTM(val param: LayerParameter, val id: Int, val net: CaffeNetwork) extend
     val N:String = null // output_features.toString
     val T = timesteps()
     val D = input_features()
-    invokeForward(dmlScript, List[String](out, c, cache_out, cache_c, cache_ifog), X, weight, bias, T, D, return_sequences.toString.toUpperCase, out0, c0)
+    if(_useBuiltinFunction)
+      invokeForward(dmlScript, List[String](out, c, cache_out), X, weight, bias, return_sequences.toString.toUpperCase, out0, c0)
+    else
+      invokeForward(dmlScript, List[String](out, c, cache_out, cache_c, cache_ifog), X, weight, bias, T, D, return_sequences.toString.toUpperCase, out0, c0)
   }
   
   override def backward(dmlScript: StringBuilder, outSuffix: String) = {
     val T = timesteps()
     val D = input_features()
-    invokeBackward(dmlScript, outSuffix, List[String]("dOut" + id, dWeight, dBias, dout0, dc0), dout, dc0, X, weight, bias,
+    if(_useBuiltinFunction)
+      invokeBackward(dmlScript, outSuffix, List[String]("dOut" + id, dWeight, dBias, dout0, dc0), dout, dc0, X, weight, bias,
+        T, D, return_sequences.toString.toUpperCase, out0, c0, cache_out)
+    else
+      invokeBackward(dmlScript, outSuffix, List[String]("dOut" + id, dWeight, dBias, dout0, dc0), dout, dc0, X, weight, bias,
         T, D, return_sequences.toString.toUpperCase, out0, c0, cache_out, cache_c, cache_ifog)
   }
   
-  val cache_out = "cache_out_" + id
+  def cache_out() = if(_useBuiltinFunction) ("lstm_state_" + id) else ("cache_out_" + id)
   val out0 = "out0_" + id
   val dout0 = "dout0_" + id
   val c0 = "cellState0_" + id
diff --git a/src/test/java/org/apache/sysml/test/gpu/LstmCPUTest.java b/src/test/java/org/apache/sysml/test/gpu/LstmCPUTest.java
index 828a809..faa014e 100644
--- a/src/test/java/org/apache/sysml/test/gpu/LstmCPUTest.java
+++ b/src/test/java/org/apache/sysml/test/gpu/LstmCPUTest.java
@@ -34,7 +34,7 @@ public class LstmCPUTest extends GPUTests {
 	private final static String TEST_NAME = "LstmTests";
 	private final int seed = 42;
 	
-	private final static String builtinDML = "\"nn/layers/lstm_staging.dml\"";
+	private final static String builtinDML = "\"nn/layers/lstm_builtin.dml\"";
 	private final static String nnDML = "\"nn/layers/lstm.dml\"";
 
 	@Override
diff --git a/src/test/java/org/apache/sysml/test/gpu/LstmTest.java b/src/test/java/org/apache/sysml/test/gpu/LstmTest.java
index 996b12a..ffc6099 100644
--- a/src/test/java/org/apache/sysml/test/gpu/LstmTest.java
+++ b/src/test/java/org/apache/sysml/test/gpu/LstmTest.java
@@ -36,7 +36,7 @@ public class LstmTest extends GPUTests {
 	private final static String TEST_NAME = "LstmTests";
 	private final int seed = 42;
 	
-	private final static String builtinDML = "\"nn/layers/lstm_staging.dml\"";
+	private final static String builtinDML = "\"nn/layers/lstm_builtin.dml\"";
 	private final static String nnDML = "\"nn/layers/lstm.dml\"";
 
 	@Override