You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by ni...@apache.org on 2019/03/20 17:55:41 UTC
[systemml] branch master updated: [SYSTEMML-540] Integrate the lstm
builtin function in Keras2DML
This is an automated email from the ASF dual-hosted git repository.
niketanpansare pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemml.git
The following commit(s) were added to refs/heads/master by this push:
new fbd3aab [SYSTEMML-540] Integrate the lstm builtin function in Keras2DML
fbd3aab is described below
commit fbd3aabbda8027e34744ad97a81f1376cf5f2041
Author: Niketan Pansare <np...@us.ibm.com>
AuthorDate: Wed Mar 20 10:54:48 2019 -0700
[SYSTEMML-540] Integrate the lstm builtin function in Keras2DML
- Also, migrated the builtin function layer from staging to nn.
- Updated the GPU tests.
---
scripts/nn/layers/conv2d.dml | 2 ++
scripts/nn/layers/lstm.dml | 2 ++
.../nn/layers/{lstm_staging.dml => lstm_builtin.dml} | 4 ++--
scripts/nn/layers/max_pool2d.dml | 2 ++
src/main/python/systemml/mllearn/estimators.py | 5 ++++-
.../scala/org/apache/sysml/api/dl/Caffe2DML.scala | 4 ++++
.../scala/org/apache/sysml/api/dl/CaffeLayer.scala | 19 +++++++++++++++----
.../java/org/apache/sysml/test/gpu/LstmCPUTest.java | 2 +-
src/test/java/org/apache/sysml/test/gpu/LstmTest.java | 2 +-
9 files changed, 33 insertions(+), 9 deletions(-)
diff --git a/scripts/nn/layers/conv2d.dml b/scripts/nn/layers/conv2d.dml
index 49d887b..de40668 100644
--- a/scripts/nn/layers/conv2d.dml
+++ b/scripts/nn/layers/conv2d.dml
@@ -21,6 +21,8 @@
/*
* 2D Convolutional layer.
+ *
+ * Consider using conv2d_builtin.dml for better performance.
*/
source("nn/util.dml") as util
diff --git a/scripts/nn/layers/lstm.dml b/scripts/nn/layers/lstm.dml
index cd1557d..838cc44 100644
--- a/scripts/nn/layers/lstm.dml
+++ b/scripts/nn/layers/lstm.dml
@@ -21,6 +21,8 @@
/*
* LSTM layer.
+ *
+ * Consider using lstm_builtin.dml for better performance.
*/
source("nn/layers/sigmoid.dml") as sigmoid
source("nn/layers/tanh.dml") as tanh
diff --git a/scripts/nn/layers/lstm_staging.dml b/scripts/nn/layers/lstm_builtin.dml
similarity index 98%
rename from scripts/nn/layers/lstm_staging.dml
rename to scripts/nn/layers/lstm_builtin.dml
index f1934da..95661f8 100644
--- a/scripts/nn/layers/lstm_staging.dml
+++ b/scripts/nn/layers/lstm_builtin.dml
@@ -21,9 +21,9 @@
/*
* LSTM layer.
+ *
+ * This implementation uses a built-in operator for higher performance.
*/
-source("nn/layers/sigmoid.dml") as sigmoid
-source("nn/layers/tanh.dml") as tanh
forward = function(matrix[double] X, matrix[double] W, matrix[double] b,
boolean return_sequences, matrix[double] out0, matrix[double] c0)
diff --git a/scripts/nn/layers/max_pool2d.dml b/scripts/nn/layers/max_pool2d.dml
index fba1a4c..ee57141 100644
--- a/scripts/nn/layers/max_pool2d.dml
+++ b/scripts/nn/layers/max_pool2d.dml
@@ -21,6 +21,8 @@
/*
* Max Pooling layer.
+ *
+ * Consider using max_pool2d_builtin.dml for better performance.
*/
source("nn/util.dml") as util
diff --git a/src/main/python/systemml/mllearn/estimators.py b/src/main/python/systemml/mllearn/estimators.py
index 144cf66..d6aa8e8 100644
--- a/src/main/python/systemml/mllearn/estimators.py
+++ b/src/main/python/systemml/mllearn/estimators.py
@@ -924,7 +924,7 @@ class Caffe2DML(BaseSystemMLClassifier):
self.estimator.setWeightsToIgnore(ignore_weights)
def set(self, debug=None, train_algo=None, test_algo=None, parallel_batches=None,
- output_activations=None, perform_one_hot_encoding=None, parfor_parameters=None, inline_nn_library=None):
+ output_activations=None, perform_one_hot_encoding=None, parfor_parameters=None, inline_nn_library=None, use_builtin_lstm_fn=None):
"""
Set input to Caffe2DML
@@ -938,6 +938,7 @@ class Caffe2DML(BaseSystemMLClassifier):
perform_one_hot_encoding: should perform one-hot encoding in DML using table function (default: False)
parfor_parameters: dictionary for parfor parameters when using allreduce-style algorithms (default: "")
inline_nn_library: whether to inline the NN library when generating DML using Caffe2DML (default: False)
+ use_builtin_lstm_fn: whether to use builtin lstm function for LSTM layer (default: True)
"""
if debug is not None:
self.estimator.setInput("$debug", str(debug).upper())
@@ -949,6 +950,8 @@ class Caffe2DML(BaseSystemMLClassifier):
self.estimator.setInput("$test_algo", str(test_algo).lower())
if parallel_batches is not None:
self.estimator.setInput("$parallel_batches", str(parallel_batches))
+ if use_builtin_lstm_fn is not None:
+ self.estimator.setInput("$use_builtin_lstm_fn", str(use_builtin_lstm_fn).upper())
if output_activations is not None:
self.estimator.setInput(
"$output_activations",
diff --git a/src/main/scala/org/apache/sysml/api/dl/Caffe2DML.scala b/src/main/scala/org/apache/sysml/api/dl/Caffe2DML.scala
index 13f8a65..e480dfc 100644
--- a/src/main/scala/org/apache/sysml/api/dl/Caffe2DML.scala
+++ b/src/main/scala/org/apache/sysml/api/dl/Caffe2DML.scala
@@ -303,6 +303,10 @@ class Caffe2DML(val sc: SparkContext,
def setDebugFlags(isDebug:Boolean):Unit = {
net.getLayers.map(layer => {net.getCaffeLayer(layer).debugLayer = isDebug})
net.getLayers.map(layer => {net.getCaffeLayer(layer).caffe2dmlObj = this})
+ net.getLayers.filter(layer => net.getCaffeLayer(layer).isInstanceOf[LSTM]).map(layer => {
+ if (inputs.containsKey("$use_builtin_lstm_fn"))
+ net.getCaffeLayer(layer).asInstanceOf[LSTM].useBuiltinFunction(inputs.get("$use_builtin_lstm_fn").toLowerCase.toBoolean)
+ })
}
// Comma is included
diff --git a/src/main/scala/org/apache/sysml/api/dl/CaffeLayer.scala b/src/main/scala/org/apache/sysml/api/dl/CaffeLayer.scala
index f405fb2..47920ca 100644
--- a/src/main/scala/org/apache/sysml/api/dl/CaffeLayer.scala
+++ b/src/main/scala/org/apache/sysml/api/dl/CaffeLayer.scala
@@ -986,6 +986,10 @@ class RNN(val param: LayerParameter, val id: Int, val net: CaffeNetwork) extends
class LSTM(val param: LayerParameter, val id: Int, val net: CaffeNetwork) extends CaffeLayer with HasWeight with HasBias {
val return_sequences = param.getRecurrentParam.getReturnSequences
+ var _useBuiltinFunction = true
+ def useBuiltinFunction(enabled:Boolean): Unit = {
+ _useBuiltinFunction = enabled
+ }
// ---------------------------------------------------------
// Note: since Caffe doesnot have return_sequences, number of output is same as number of neurons
def M():String = param.getRecurrentParam.getNumOutput.toString
@@ -994,7 +998,7 @@ class LSTM(val param: LayerParameter, val id: Int, val net: CaffeNetwork) extend
def timesteps():String = bottomLayerOutputShape._1
def input_features():String = bottomLayerOutputShape._2
def output_features():Int = param.getRecurrentParam.getNumOutput
- override def sourceFileName = "lstm"
+ override def sourceFileName = if(_useBuiltinFunction) "lstm_builtin" else "lstm"
override def outputShape = if(return_sequences) (timesteps, output_features.toString, "1") else (output_features.toString, "1", "1")
override def biasShape(): Array[Int] = Array(1, 4*M.toInt)
override def weightShape(): Array[Int] = Array(input_features.toInt + M.toInt, 4*M.toInt)
@@ -1009,17 +1013,24 @@ class LSTM(val param: LayerParameter, val id: Int, val net: CaffeNetwork) extend
val N:String = null // output_features.toString
val T = timesteps()
val D = input_features()
- invokeForward(dmlScript, List[String](out, c, cache_out, cache_c, cache_ifog), X, weight, bias, T, D, return_sequences.toString.toUpperCase, out0, c0)
+ if(_useBuiltinFunction)
+ invokeForward(dmlScript, List[String](out, c, cache_out), X, weight, bias, return_sequences.toString.toUpperCase, out0, c0)
+ else
+ invokeForward(dmlScript, List[String](out, c, cache_out, cache_c, cache_ifog), X, weight, bias, T, D, return_sequences.toString.toUpperCase, out0, c0)
}
override def backward(dmlScript: StringBuilder, outSuffix: String) = {
val T = timesteps()
val D = input_features()
- invokeBackward(dmlScript, outSuffix, List[String]("dOut" + id, dWeight, dBias, dout0, dc0), dout, dc0, X, weight, bias,
+ if(_useBuiltinFunction)
+ invokeBackward(dmlScript, outSuffix, List[String]("dOut" + id, dWeight, dBias, dout0, dc0), dout, dc0, X, weight, bias,
+ T, D, return_sequences.toString.toUpperCase, out0, c0, cache_out)
+ else
+ invokeBackward(dmlScript, outSuffix, List[String]("dOut" + id, dWeight, dBias, dout0, dc0), dout, dc0, X, weight, bias,
T, D, return_sequences.toString.toUpperCase, out0, c0, cache_out, cache_c, cache_ifog)
}
- val cache_out = "cache_out_" + id
+ def cache_out() = if(_useBuiltinFunction) ("lstm_state_" + id) else ("cache_out_" + id)
val out0 = "out0_" + id
val dout0 = "dout0_" + id
val c0 = "cellState0_" + id
diff --git a/src/test/java/org/apache/sysml/test/gpu/LstmCPUTest.java b/src/test/java/org/apache/sysml/test/gpu/LstmCPUTest.java
index 828a809..faa014e 100644
--- a/src/test/java/org/apache/sysml/test/gpu/LstmCPUTest.java
+++ b/src/test/java/org/apache/sysml/test/gpu/LstmCPUTest.java
@@ -34,7 +34,7 @@ public class LstmCPUTest extends GPUTests {
private final static String TEST_NAME = "LstmTests";
private final int seed = 42;
- private final static String builtinDML = "\"nn/layers/lstm_staging.dml\"";
+ private final static String builtinDML = "\"nn/layers/lstm_builtin.dml\"";
private final static String nnDML = "\"nn/layers/lstm.dml\"";
@Override
diff --git a/src/test/java/org/apache/sysml/test/gpu/LstmTest.java b/src/test/java/org/apache/sysml/test/gpu/LstmTest.java
index 996b12a..ffc6099 100644
--- a/src/test/java/org/apache/sysml/test/gpu/LstmTest.java
+++ b/src/test/java/org/apache/sysml/test/gpu/LstmTest.java
@@ -36,7 +36,7 @@ public class LstmTest extends GPUTests {
private final static String TEST_NAME = "LstmTests";
private final int seed = 42;
- private final static String builtinDML = "\"nn/layers/lstm_staging.dml\"";
+ private final static String builtinDML = "\"nn/layers/lstm_builtin.dml\"";
private final static String nnDML = "\"nn/layers/lstm.dml\"";
@Override